framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,1,128,1,float16,fp8,0,19.82419713338216
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,4,128,1,float16,fp8,0,19.82425053914388
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,1,128,1,float16,float16,0,21.55480448404948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,8,128,1,float16,float16,0,21.558436075846355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,8,128,1,float16,fp8,0,19.825108846028645
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,2,128,1,float16,fp8,0,19.84228769938151
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,2,128,1,float16,float16,0,21.561968485514324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,4,128,1,float16,float16,0,21.55738576253255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,1,128,1,float16,fp8,0,10.813514709472656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,2,128,1,float16,fp8,0,10.679434458414713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,64,128,1,float16,float16,0,10.866325378417969
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,64,128,1,float16,fp8,0,9.9857546488444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,4,128,1,float16,float16,0,10.847338358561197
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,1,128,1,float16,float16,0,15.366383870442709
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,2,128,1,float16,float16,0,15.10097630818685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,4,128,1,float16,fp8,0,9.971104303995768
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,64,128,1,float16,fp8,0,5.052159945170085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,1,128,1,float16,float16,0,7.64132817586263
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,64,128,1,float16,float16,0,5.499173482259114
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,1,128,1,float16,fp8,0,5.604074478149414
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,2,128,1,float16,fp8,0,5.363333384195964
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,8,128,1,float16,fp8,0,9.98086929321289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,2,128,1,float16,float16,0,7.534554799397786
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,8,128,1,float16,float16,0,13.373967488606771
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,4,128,1,float16,float16,0,5.488741556803386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,8,128,1,float16,float16,0,5.487983703613281
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,1,128,1,float16,float16,0,3.831658681233724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,8,128,1,float16,fp8,0,5.395845413208008
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,4,128,1,float16,fp8,0,5.758602778116862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,64,128,1,float16,fp8,0,2.5864532788594565
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,64,128,1,float16,float16,0,2.819375991821289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,1,128,1,float16,fp8,0,2.841562589009603
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,2,128,1,float16,float16,0,3.457184155782064
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,2,128,1,float16,fp8,0,2.7426878611246743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,4,128,1,float16,fp8,0,2.75872008005778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,8,128,1,float16,float16,0,2.8623412450154624
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,8,128,1,float16,fp8,0,2.7468694051106772
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,4,128,1,float16,float16,0,3.5766773223876953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,1,128,1,float16,fp8,0,11.378421783447266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,1,128,1,float16,float16,0,12.366811116536459
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,2,128,1,float16,fp8,0,11.385562896728516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,2,128,1,float16,float16,0,12.466373443603516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,4,128,1,float16,float16,0,12.370282491048178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,4,128,1,float16,fp8,0,11.995919545491537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,8,128,1,float16,float16,0,12.381050109863281
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,64,128,1,float16,fp8,0,5.746725082397461
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,64,128,1,float16,float16,0,6.25430425008138
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,1,128,1,float16,fp8,0,6.168607711791992
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,2,128,1,float16,fp8,0,6.148997624715169
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,1,128,1,float16,float16,0,8.136245091756185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,8,128,1,float16,fp8,0,11.38650131225586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,2,128,1,float16,float16,0,8.780149459838867
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,4,128,1,float16,float16,0,8.844117482503256
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,1,128,1,float16,float16,0,3.1675519943237305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,4,128,1,float16,fp8,0,6.1233170827229815
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,8,128,1,float16,fp8,0,5.738992055257161
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,1,128,1,float16,fp8,0,3.5969759623209634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,64,128,1,float16,float16,0,3.217749277750651
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,64,128,1,float16,fp8,0,3.030154546101888
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,2,128,1,float16,float16,0,4.178106625874837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,2,128,1,float16,fp8,0,2.9118881225585938
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,4,128,1,float16,fp8,0,2.9136746724446616
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,8,128,1,float16,float16,0,3.203536033630371
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,1,128,1,float16,float16,0,1.6302560170491536
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,8,128,1,float16,fp8,0,3.110746701558431
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,64,128,1,float16,float16,0,1.6447839736938477
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,64,128,1,float16,fp8,0,1.6293813387552898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,2,128,1,float16,fp8,0,1.500912030537923
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,2,128,1,float16,float16,0,2.0330026944478354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,4,128,1,float16,float16,0,2.042090733846029
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,8,128,1,float16,float16,0,6.235685348510742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,4,128,1,float16,fp8,0,1.6008000373840332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,8,128,1,float16,fp8,0,1.5046826998392742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,4,128,1,float16,float16,0,3.176133473714193
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,1,128,1,float16,fp8,0,1.6032640139261882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,8,128,1,float16,float16,0,1.6636959711710613
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,2,128,1,float16,float16,0,8.725034713745117
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,1,128,1,float16,fp8,0,8.788986841837565
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,2,128,1,float16,fp8,0,8.030597050984701
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,4,128,1,float16,float16,0,8.72543462117513
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,4,128,1,float16,fp8,0,8.030789057413736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,1,128,1,float16,fp8,0,4.053093274434407
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,1,128,1,float16,float16,0,5.457792282104492
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,8,128,1,float16,float16,0,8.73092269897461
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,64,128,1,float16,fp8,0,4.066106796264648
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,64,128,1,float16,float16,0,4.42252254486084
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,1,128,1,float16,float16,0,8.75166384379069
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,2,128,1,float16,float16,0,4.4058027267456055
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,8,128,1,float16,fp8,0,8.043941497802734
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,2,128,1,float16,fp8,0,4.05354658762614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,4,128,1,float16,fp8,0,4.355274518330892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,1,128,1,float16,float16,0,2.7639039357503257
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,4,128,1,float16,float16,0,5.92301877339681
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,8,128,1,float16,fp8,0,4.053898811340332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,64,128,1,float16,fp8,0,2.06986665725708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,64,128,1,float16,float16,0,2.253925323486328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,8,128,1,float16,float16,0,5.896042505900065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,2,128,1,float16,fp8,0,2.3145012855529785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,4,128,1,float16,fp8,0,2.065626621246338
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,4,128,1,float16,float16,0,2.729658762613932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,8,128,1,float16,float16,0,2.246485392252604
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,8,128,1,float16,fp8,0,2.208591938018799
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,1,128,1,float16,float16,0,1.16267196337382
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,1,128,1,float16,fp8,0,1.1448480288187664
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,2,128,1,float16,float16,0,1.1649866898854573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,64,128,1,float16,fp8,0,1.1259146531422932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,64,128,1,float16,float16,0,1.2083360354105632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,4,128,1,float16,float16,0,1.1650826930999756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,4,128,1,float16,fp8,0,1.07259202003479
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,1,128,1,float16,fp8,0,2.0652586619059243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,8,128,1,float16,float16,0,1.1699786980946858
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,8,128,1,float16,fp8,0,1.088597297668457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,2,128,1,float16,float16,0,2.245120048522949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,2,128,1,float16,fp8,0,1.0702293713887532
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,1,128,1,float16,fp8,0,10.461530685424805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,1,128,1,float16,float16,0,11.359967549641928
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,2,128,1,float16,fp8,0,10.470677057902018
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,2,128,1,float16,float16,0,12.383108774820963
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,4,128,1,float16,fp8,0,10.463274637858072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,4,128,1,float16,float16,0,11.366063435872396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,1,128,1,float16,float16,0,5.720090866088867
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,2,128,1,float16,float16,0,5.71675173441569
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,2,128,1,float16,fp8,0,5.642549514770508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,1,128,1,float16,fp8,0,5.909685134887695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,8,128,1,float16,float16,0,11.366154988606771
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,64,128,1,float16,fp8,0,5.279568036397298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,8,128,1,float16,fp8,0,10.463850657145182
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,64,128,1,float16,float16,0,5.746421178181966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,1,128,1,float16,float16,0,3.5764640172322593
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,1,128,1,float16,fp8,0,2.855050722757975
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,64,128,1,float16,fp8,0,2.6715466181437173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,4,128,1,float16,fp8,0,5.262666702270508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,64,128,1,float16,float16,0,2.907925287882487
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,8,128,1,float16,float16,0,8.185125350952148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,2,128,1,float16,float16,0,3.6812426249186196
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,2,128,1,float16,fp8,0,2.8554185231526694
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,4,128,1,float16,fp8,0,2.663263956705729
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,4,128,1,float16,float16,0,3.6029227574666343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,8,128,1,float16,float16,0,3.1127894719441733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,1,128,1,float16,float16,0,1.4823892911275227
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,1,128,1,float16,fp8,0,1.4982239405314128
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,4,128,1,float16,float16,0,5.720298767089844
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,8,128,1,float16,fp8,0,3.803765296936035
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,2,128,1,float16,fp8,0,1.3635466893513997
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,64,128,1,float16,fp8,0,1.3672000567118328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,2,128,1,float16,float16,0,1.8418560028076172
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,64,128,1,float16,float16,0,1.5322240193684895
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,4,128,1,float16,float16,0,1.556613286336263
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,8,128,1,float16,fp8,0,5.6979414621988935
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,4,128,1,float16,fp8,0,1.462048053741455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,8,128,1,float16,float16,0,1.5633653004964192
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,8,128,1,float16,fp8,0,1.3636479377746582
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,1,128,1,float16,fp8,0,0.7776266733805338
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,1,128,1,float16,float16,0,0.9524640242258707
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,2,128,1,float16,fp8,0,0.7132213115692139
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,2,128,1,float16,float16,0,0.9374293486277262
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,4,128,1,float16,float16,0,0.8455626964569092
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,64,128,1,float16,fp8,0,0.726149320602417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,64,128,1,float16,float16,0,0.7868586381276449
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,8,128,1,float16,float16,0,0.7810613314310709
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,1,128,1,float16,fp8,0,6.455151875813802
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,2,128,1,float16,fp8,0,6.108160018920898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,2,128,1,float16,float16,0,7.3121490478515625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,4,128,1,float16,fp8,0,6.102591832478841
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,4,128,1,float16,float16,0,6.6256052652994795
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,1,128,1,float16,float16,0,9.27407455444336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,8,128,1,float16,fp8,0,0.7659893035888672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,2,128,1,float16,float16,0,3.3385492960611978
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,1,128,1,float16,float16,0,4.328847885131836
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,1,128,1,float16,fp8,0,4.171237309773763
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,64,128,1,float16,fp8,0,3.0897865295410156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,8,128,1,float16,float16,0,6.62722651163737
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,64,128,1,float16,float16,0,3.415226618448893
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,4,128,1,float16,fp8,0,0.7133333683013916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,2,128,1,float16,fp8,0,3.2983786265055337
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,4,128,1,float16,fp8,0,3.3359785079956055
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,4,128,1,float16,float16,0,4.506239891052246
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,8,128,1,float16,float16,0,3.3487892150878906
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,8,128,1,float16,fp8,0,3.301952044169108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,64,128,1,float16,float16,0,1.7140746116638184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,1,128,1,float16,float16,0,2.0709226926167807
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,1,128,1,float16,fp8,0,1.6883093516031902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,2,128,1,float16,float16,0,2.309429327646891
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,2,128,1,float16,fp8,0,1.674085299173991
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,4,128,1,float16,float16,0,1.698560078938802
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,4,128,1,float16,fp8,0,1.5639626185099285
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,8,128,1,float16,float16,0,1.8135466575622559
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,1,128,1,float16,float16,0,1.06822935740153
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,8,128,1,float16,fp8,0,6.104186375935872
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,1,128,1,float16,fp8,0,0.8650133609771729
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,2,128,1,float16,float16,0,1.0587573051452637
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,4,128,1,float16,float16,0,1.0810933113098145
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,64,128,1,float16,fp8,0,0.8105599880218506
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,64,128,1,float16,float16,0,0.9427306652069092
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,4,128,1,float16,fp8,0,0.8643733660380045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,8,128,1,float16,fp8,0,0.8684960206349691
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,8,128,1,float16,float16,0,0.8871946334838867
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,1,128,1,float16,float16,0,0.465338667233785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,1,128,1,float16,fp8,0,0.4620213508605957
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,2,128,1,float16,float16,0,0.46860265731811523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,64,128,1,float16,fp8,0,1.5705119768778484
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,64,128,1,float16,fp8,0,0.47329068183898926
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,64,128,1,float16,float16,0,0.4919039805730184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,4,128,1,float16,float16,0,0.46750398476918537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,8,128,1,float16,fp8,0,1.5684159596761067
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,4,128,1,float16,fp8,0,0.46088000138600665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,8,128,1,float16,float16,0,0.5035839875539144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,2,128,1,float16,fp8,0,0.8635679880777994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,2,128,1,float16,fp8,0,0.42956264813741046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,1,128,1,float16,float16,0,6.260992050170898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,2,128,1,float16,float16,0,6.262778600056966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,1,128,1,float16,fp8,0,6.191141128540039
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,8,128,1,float16,fp8,0,0.42847466468811035
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,4,128,1,float16,float16,0,6.265418370564778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,4,128,1,float16,fp8,0,5.783018747965495
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,1,128,1,float16,float16,0,3.875685373942057
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,1,128,1,float16,fp8,0,3.1118507385253906
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,64,128,1,float16,fp8,0,2.9291305541992188
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,8,128,1,float16,float16,0,6.2721811930338545
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,64,128,1,float16,float16,0,3.21834659576416
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,8,128,1,float16,fp8,0,6.216581344604492
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,2,128,1,float16,fp8,0,6.035055796305339
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,2,128,1,float16,float16,0,3.3937387466430664
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,2,128,1,float16,fp8,0,3.3571945826212564
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,4,128,1,float16,fp8,0,2.907909393310547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,8,128,1,float16,float16,0,3.15559450785319
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,1,128,1,float16,float16,0,1.9893652598063152
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,64,128,1,float16,fp8,0,1.485354741414388
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,8,128,1,float16,fp8,0,3.117781321207682
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,1,128,1,float16,fp8,0,1.620848019917806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,64,128,1,float16,float16,0,1.634335994720459
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,2,128,1,float16,fp8,0,1.5755732854207356
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,4,128,1,float16,float16,0,1.9946986834208171
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,4,128,1,float16,fp8,0,1.524778683980306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,8,128,1,float16,float16,0,1.7063520749409993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,8,128,1,float16,fp8,0,1.5825440088907878
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,1,128,1,float16,float16,0,1.0086560249328613
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,1,128,1,float16,fp8,0,0.7539733250935873
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,64,128,1,float16,float16,0,0.8655146757761637
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,64,128,1,float16,fp8,0,0.7602826754252116
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,4,128,1,float16,fp8,0,0.8063946564992269
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,4,128,1,float16,float16,0,0.9944427013397217
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,8,128,1,float16,float16,0,0.8194826443990072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,8,128,1,float16,fp8,0,0.8068959712982178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,1,128,1,float16,float16,0,0.45981331666310626
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,1,128,1,float16,fp8,0,0.42510398228963214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,64,128,1,float16,fp8,0,0.42321598529815674
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,2,128,1,float16,float16,0,1.5963466962178547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,2,128,1,float16,fp8,0,0.42478398482004803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,64,128,1,float16,float16,0,0.47387198607126874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,4,128,1,float16,fp8,0,0.42633068561553955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,4,128,1,float16,float16,0,0.46888534228007
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,8,128,1,float16,fp8,0,0.4246346553166707
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,8,128,1,float16,float16,0,0.4307733376820882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,1,128,1,float16,float16,0,0.271178662776947
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,1,128,1,float16,fp8,0,0.23286932706832886
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,2,128,1,float16,float16,0,0.9936959743499756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,2,128,1,float16,fp8,0,0.7538293202718099
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,2,128,1,float16,fp8,0,0.23206400871276855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,64,128,1,float16,fp8,0,0.22724799315134683
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,4,128,1,float16,fp8,0,0.2310346762339274
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,4,128,1,float16,float16,0,0.26074665784835815
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,8,128,1,float16,float16,0,0.23643734057744345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,8,128,1,float16,fp8,0,0.21843733390172324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,2,128,1,float16,float16,0,0.4589333136876424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,2,128,1,float16,float16,0,3.7465438842773438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,1,128,1,float16,fp8,0,3.7038132349650064
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,2,128,1,float16,fp8,0,3.4642985661824546
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,2,128,1,float16,float16,0,0.2704533338546753
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,4,128,1,float16,float16,0,3.7507521311442056
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,4,128,1,float16,float16,0,3.9429601033528647
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,4,128,1,float16,fp8,0,3.462970733642578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,8,128,1,float16,fp8,0,3.4729601542154946
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,8,128,1,float16,float16,0,3.7574774424235025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,1,128,1,float16,float16,0,2.312431971232096
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,64,128,1,float16,fp8,0,1.7628480593363445
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,64,128,1,float16,float16,0,1.9294346173604329
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,1,128,1,float16,fp8,0,1.8625706036885579
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,1,128,1,float16,float16,0,4.664650599161784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,2,128,1,float16,fp8,0,1.8652480443318684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,4,128,1,float16,float16,0,2.317018667856852
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,4,128,1,float16,fp8,0,1.7467145919799805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,8,128,1,float16,float16,0,2.361135959625244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,64,128,1,float16,float16,0,0.2590666611989339
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,8,128,1,float16,fp8,0,1.7469654083251953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,1,128,1,float16,fp8,0,0.9470880031585693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,2,128,1,float16,float16,0,1.1794933478037517
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,64,128,1,float16,fp8,0,0.8969973723093668
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,2,128,1,float16,fp8,0,0.8889760176340739
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,4,128,1,float16,float16,0,0.961845318476359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,4,128,1,float16,fp8,0,0.8881653149922689
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,64,128,1,float16,float16,0,0.9808533191680908
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,8,128,1,float16,float16,0,1.170624017715454
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,1,128,1,float16,float16,0,0.5903573433558146
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,8,128,1,float16,fp8,0,0.9470132986704508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,1,128,1,float16,fp8,0,0.49006934960683185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,2,128,1,float16,fp8,0,0.49029866854349774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,4,128,1,float16,float16,0,0.5398240089416504
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,2,128,1,float16,float16,0,2.372975985209147
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,64,128,1,float16,fp8,0,0.47121067841847736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,8,128,1,float16,float16,0,0.5272159973780314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,8,128,1,float16,fp8,0,0.48692798614501953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,1,128,1,float16,float16,0,0.26633065938949585
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,1,128,1,float16,float16,0,1.282922665278117
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,1,128,1,float16,fp8,0,0.24437334140141806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,64,128,1,float16,float16,0,0.2979360024134318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,64,128,1,float16,fp8,0,0.27082665761311847
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,2,128,1,float16,fp8,0,0.2444960077603658
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,2,128,1,float16,float16,0,0.30408533414204914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,4,128,1,float16,fp8,0,0.2642880082130432
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,4,128,1,float16,float16,0,0.33002134164174396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,8,128,1,float16,fp8,0,0.26209600766499835
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,8,128,1,float16,float16,0,0.2669919927914937
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,1,128,1,float16,float16,0,0.1637493371963501
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,1,128,1,float16,fp8,0,0.14850667119026184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,2,128,1,float16,float16,0,0.16726932922999063
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,2,128,1,float16,fp8,0,0.14677866299947104
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,2,128,1,float16,float16,0,0.6037919918696085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,4,128,1,float16,float16,0,0.16588800152142844
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,64,128,1,float16,fp8,0,0.14940266807874045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,4,128,1,float16,fp8,0,0.1476959983507792
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,64,128,1,float16,float16,0,0.5573493242263794
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,64,128,1,float16,float16,0,0.16664533813794455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,8,128,1,float16,fp8,0,0.14734400312105814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,1,128,1,float16,float16,0,4.639936129252116
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,1,128,1,float16,fp8,0,3.436426798502604
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,2,128,1,float16,float16,0,3.710261344909668
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,2,128,1,float16,fp8,0,3.4347521464029946
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,4,128,1,float16,float16,0,3.712277412414551
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,8,128,1,float16,float16,0,0.1690453290939331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,4,128,1,float16,fp8,0,0.48972801367441815
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,4,128,1,float16,fp8,0,3.435680071512858
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,1,128,1,float16,float16,0,2.2517226537068686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,1,128,1,float16,fp8,0,1.8338079452514648
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,2,128,1,float16,float16,0,2.235685348510742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,8,128,1,float16,fp8,0,3.4411681493123374
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,8,128,1,float16,float16,0,4.497402509053548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,2,128,1,float16,fp8,0,1.7283786137898762
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,64,128,1,float16,fp8,0,1.751029332478841
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,64,128,1,float16,float16,0,1.9183093706766765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,4,128,1,float16,float16,0,2.2141812642415366
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,8,128,1,float16,fp8,0,1.8362560272216797
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,8,128,1,float16,float16,0,1.8705546061197917
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,1,128,1,float16,float16,0,0.9455413023630778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,1,128,1,float16,fp8,0,0.9282453060150146
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,64,128,1,float16,float16,0,1.0352853139241536
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,2,128,1,float16,float16,0,0.9893333117167155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,4,128,1,float16,float16,0,1.1324640115102131
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,4,128,1,float16,fp8,0,0.8783093293507894
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,2,128,1,float16,fp8,0,0.928394635518392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,64,128,1,float16,fp8,0,0.886735995610555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,8,128,1,float16,float16,0,1.1417653560638428
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,1,128,1,float16,fp8,0,0.4493173360824585
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,8,128,1,float16,fp8,0,0.9420426686604818
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,2,128,1,float16,float16,0,0.48727468649546307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,64,128,1,float16,fp8,0,0.45551466941833496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,4,128,1,float16,fp8,0,0.48290133476257324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,4,128,1,float16,fp8,0,1.8327679634094238
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,8,128,1,float16,float16,0,0.4861706495285034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,8,128,1,float16,fp8,0,0.4511839946111043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,1,128,1,float16,float16,0,0.27297067642211914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,1,128,1,float16,fp8,0,0.2505173285802205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,2,128,1,float16,float16,0,0.2702453335126241
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,64,128,1,float16,float16,0,0.2938506603240967
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,2,128,1,float16,fp8,0,0.25271467367808026
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,64,128,1,float16,fp8,0,0.25226134061813354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,4,128,1,float16,fp8,0,0.2547253370285034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,8,128,1,float16,fp8,0,0.2403306762377421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,1,128,1,float16,float16,0,0.5129760106404623
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,1,128,1,float16,float16,0,0.15806399782498678
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,2,128,1,float16,fp8,0,0.4798186620076497
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,1,128,1,float16,fp8,0,0.13900267084439596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,4,128,1,float16,float16,0,0.5708746512730917
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,2,128,1,float16,fp8,0,0.1406880021095276
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,64,128,1,float16,float16,0,0.16099199652671814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,4,128,1,float16,float16,0,0.1532693306605021
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,4,128,1,float16,fp8,0,0.13893333077430725
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,8,128,1,float16,float16,0,0.15505066514015198
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,8,128,1,float16,fp8,0,0.13486400246620178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,1,128,1,float16,float16,0,0.08772266904513042
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,1,128,1,float16,fp8,0,0.07861333092053731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,4,128,1,float16,float16,0,0.2741173307100932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,64,128,1,float16,fp8,0,0.08229866623878479
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,4,128,1,float16,float16,0,0.0904266635576884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,2,128,1,float16,fp8,0,0.08204799890518188
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,4,128,1,float16,fp8,0,0.0788320004940033
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,8,128,1,float16,float16,0,0.25748799244562787
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,8,128,1,float16,float16,0,0.08275199929873149
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,8,128,1,float16,fp8,0,0.08053866525491078
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,64,128,1,float16,float16,0,0.5000160137812296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,2,128,1,float16,float16,0,0.14658133188883463
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,2,128,1,float16,float16,0,0.09076266487439473
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,1,128,1,float16,float16,0,2.3076799710591636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,1,128,1,float16,fp8,0,2.140197277069092
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,64,128,1,float16,float16,0,0.09300800164540608
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,2,128,1,float16,float16,0,2.3097492853800454
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,2,128,1,float16,fp8,0,2.14792537689209
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,4,128,1,float16,float16,0,2.7583786646525064
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,4,128,1,float16,fp8,0,2.143610636393229
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,64,128,1,float16,fp8,0,0.1394613285859426
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,8,128,1,float16,float16,0,2.3157547314961753
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,8,128,1,float16,fp8,0,2.1436427434285483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,64,128,1,float16,float16,0,1.2201493581136067
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,64,128,1,float16,fp8,0,1.0954826672871907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,1,128,1,float16,fp8,0,1.2027733325958252
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,2,128,1,float16,float16,0,1.1640533606211345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,4,128,1,float16,float16,0,1.3970026969909668
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,2,128,1,float16,fp8,0,1.1374346415201824
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,4,128,1,float16,fp8,0,1.1403306325276692
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,8,128,1,float16,float16,0,1.1976479689280193
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,8,128,1,float16,fp8,0,1.145290692647298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,1,128,1,float16,float16,0,0.6921760241190592
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,1,128,1,float16,fp8,0,0.5784586668014526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,2,128,1,float16,float16,0,0.7123733361562093
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,2,128,1,float16,fp8,0,0.5850613514582316
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,64,128,1,float16,fp8,0,0.5625653266906738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,64,128,1,float16,float16,0,0.6111040115356445
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,8,128,1,float16,float16,0,0.681488037109375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,8,128,1,float16,fp8,0,0.5822826623916626
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,1,128,1,float16,float16,0,0.35953064759572345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,1,128,1,float16,fp8,0,0.28652799129486084
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,64,128,1,float16,fp8,0,0.28940800825754803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,1,128,1,float16,float16,0,1.3787627220153809
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,2,128,1,float16,fp8,0,0.28541332483291626
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,4,128,1,float16,float16,0,0.3081706762313843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,4,128,1,float16,fp8,0,0.30169065793355304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,8,128,1,float16,float16,0,0.30954132477442425
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,8,128,1,float16,fp8,0,0.28548266490300495
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,1,128,1,float16,float16,0,0.16828266779581705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,4,128,1,float16,float16,0,0.6903839906056722
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,1,128,1,float16,fp8,0,0.15264532963434854
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,64,128,1,float16,float16,0,0.1916960080464681
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,64,128,1,float16,fp8,0,0.1639840006828308
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,2,128,1,float16,fp8,0,0.16358400384585062
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,4,128,1,float16,float16,0,0.16506133476893106
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,8,128,1,float16,float16,0,0.1670773426691691
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,4,128,1,float16,fp8,0,0.16457600394884744
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,8,128,1,float16,fp8,0,0.16193067034085593
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,1,128,1,float16,float16,0,0.1034399966398875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,1,128,1,float16,fp8,0,0.09097066521644592
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,2,128,1,float16,float16,0,0.3566133181254069
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,2,128,1,float16,float16,0,0.10240532954533894
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,64,128,1,float16,float16,0,0.32182933886845905
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,4,128,1,float16,fp8,0,0.09077333410580952
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,64,128,1,float16,float16,0,0.1079306701819102
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,64,128,1,float16,fp8,0,0.09289066990216573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,8,128,1,float16,float16,0,0.09451199571291606
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,8,128,1,float16,fp8,0,0.0918239951133728
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,1,128,1,float16,float16,0,0.05640000104904175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,1,128,1,float16,fp8,0,0.05226666728655497
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,2,128,1,float16,float16,0,0.05589866638183594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,2,128,1,float16,fp8,0,0.055248002211252846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,4,128,1,float16,float16,0,0.055946667989095054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,8,128,1,float16,float16,0,0.05635733405749003
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,4,128,1,float16,fp8,0,0.5495359897613525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,64,128,1,float16,float16,0,0.06414400041103363
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,8,128,1,float16,fp8,0,0.05157866577307383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,2,128,1,float16,float16,0,0.1646399994691213
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,2,128,1,float16,fp8,0,0.09070932865142822
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,4,128,1,float16,float16,0,0.10428800185521443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,1,128,1,float16,float16,0,2.814453442891439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,2,128,1,float16,float16,0,2.4327893257141113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,2,128,1,float16,fp8,0,2.2606399854024253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,4,128,1,float16,fp8,0,0.05566933254400889
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,64,128,1,float16,fp8,0,0.05179733534653982
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,4,128,1,float16,float16,0,2.4366453488667807
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,8,128,1,float16,float16,0,2.8029492696126304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,4,128,1,float16,fp8,0,2.358714739481608
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,1,128,1,float16,float16,0,1.2236426671346028
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,1,128,1,float16,fp8,0,2.262725353240967
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,64,128,1,float16,fp8,0,1.154800017674764
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,2,128,1,float16,float16,0,1.4107786814371746
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,64,128,1,float16,float16,0,1.2646506627400715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,1,128,1,float16,fp8,0,1.1865920225779216
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,2,128,1,float16,fp8,0,1.187391996383667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,4,128,1,float16,float16,0,1.2263253529866536
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,4,128,1,float16,fp8,0,1.1895039876302083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,8,128,1,float16,float16,0,1.2301066716512044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,1,128,1,float16,float16,0,0.6203893423080444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,8,128,1,float16,fp8,0,1.201253334681193
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,4,128,1,float16,float16,0,0.6222560008366903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,64,128,1,float16,float16,0,0.6911146640777588
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,8,128,1,float16,fp8,0,2.274400075276693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,64,128,1,float16,fp8,0,0.5888160069783529
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,2,128,1,float16,float16,0,0.7090933322906494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,2,128,1,float16,fp8,0,0.6025919914245605
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,8,128,1,float16,float16,0,0.6235520044962565
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,8,128,1,float16,fp8,0,0.6066453456878662
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,2,128,1,float16,float16,0,0.3327680031458537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,1,128,1,float16,fp8,0,0.2962613304456075
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,64,128,1,float16,fp8,0,0.31281065940856934
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,64,128,1,float16,float16,0,0.38718398412068683
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,2,128,1,float16,fp8,0,0.29663999875386554
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,4,128,1,float16,float16,0,0.3331200083096822
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,4,128,1,float16,fp8,0,0.31017067035039264
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,8,128,1,float16,float16,0,0.32045332590738934
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,1,128,1,float16,fp8,0,0.6040480136871338
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,8,128,1,float16,fp8,0,0.3147146701812744
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,1,128,1,float16,float16,0,0.18528000513712564
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,1,128,1,float16,fp8,0,0.16661333044370016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,2,128,1,float16,float16,0,0.17067732413609824
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,2,128,1,float16,fp8,0,0.16517333189646402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,4,128,1,float16,fp8,0,0.15703466534614563
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,64,128,1,float16,float16,0,0.17497599124908447
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,64,128,1,float16,fp8,0,0.1675306757291158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,4,128,1,float16,fp8,0,0.6030293305714926
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,8,128,1,float16,fp8,0,0.15754133462905884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,1,128,1,float16,float16,0,0.10345066587130229
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,1,128,1,float16,fp8,0,0.08698667089144389
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,2,128,1,float16,float16,0,0.10186133782068889
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,2,128,1,float16,fp8,0,0.08637866377830505
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,4,128,1,float16,float16,0,0.10428266723950703
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,1,128,1,float16,float16,0,0.3558666706085205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,64,128,1,float16,fp8,0,0.09418666362762451
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,64,128,1,float16,float16,0,0.11130666732788086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,4,128,1,float16,fp8,0,0.08573866883913676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,8,128,1,float16,float16,0,0.102783997853597
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,8,128,1,float16,fp8,0,0.09085333347320557
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,1,128,1,float16,float16,0,0.05487466851870219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,1,128,1,float16,fp8,0,0.05332799752553304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,2,128,1,float16,float16,0,0.05946666498978933
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,4,128,1,float16,float16,0,0.05843733251094818
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,4,128,1,float16,fp8,0,0.0532533327738444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,64,128,1,float16,fp8,0,0.05082666873931885
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,8,128,1,float16,float16,0,0.0584853341182073
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,64,128,1,float16,float16,0,0.064410666624705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,8,128,1,float16,fp8,0,0.05410666763782501
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,4,128,1,float16,float16,0,0.18571199973424277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,1,128,1,float16,fp8,0,0.03443733354409536
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,64,128,1,float16,fp8,0,0.034160000582536064
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,2,128,1,float16,float16,0,0.03907199949026108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,8,128,1,float16,float16,0,0.17150932550430298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,2,128,1,float16,fp8,0,0.03465600063403448
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,64,128,1,float16,float16,0,0.03550933301448822
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,4,128,1,float16,fp8,0,0.034832000732421875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,4,128,1,float16,float16,0,0.03770666569471359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,8,128,1,float16,float16,0,0.03770666569471359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,8,128,1,float16,fp8,0,0.0346666673819224
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,2,128,1,float16,fp8,0,0.0499893327554067
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,1,128,1,float16,fp8,0,1.670464038848877
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,1,128,1,float16,float16,0,1.800714651743571
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,2,128,1,float16,fp8,0,1.7176213264465332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,2,128,1,float16,float16,0,1.9730559984842937
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,1,128,1,float16,float16,0,0.037461332976818085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,4,128,1,float16,float16,0,1.8991306622823079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,4,128,1,float16,fp8,0,1.6701119740804036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,1,128,1,float16,float16,0,0.9865067005157471
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,1,128,1,float16,fp8,0,0.8419679800669352
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,8,128,1,float16,float16,0,1.8125012715657551
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,2,128,1,float16,float16,0,1.0088586807250977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,64,128,1,float16,float16,0,0.9342827002207438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,64,128,1,float16,fp8,0,0.8652959664662679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,2,128,1,float16,fp8,0,0.8642666339874268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,8,128,1,float16,fp8,0,1.6705333391825359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,4,128,1,float16,float16,0,0.985210657119751
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,4,128,1,float16,fp8,0,0.8421706358591715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,1,128,1,float16,float16,0,0.49141331513722736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,8,128,1,float16,fp8,0,0.8644959926605225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,64,128,1,float16,fp8,0,0.4405546585718791
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,64,128,1,float16,float16,0,0.5333919922510783
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,4,128,1,float16,float16,0,0.49350400765736896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,8,128,1,float16,float16,0,0.9142186641693115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,2,128,1,float16,fp8,0,0.43915732701619464
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,1,128,1,float16,float16,0,0.2383306622505188
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,1,128,1,float16,fp8,0,0.22096532583236694
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,8,128,1,float16,float16,0,0.5378613471984863
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,4,128,1,float16,fp8,0,0.4265973170598348
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,8,128,1,float16,fp8,0,0.4453333218892415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,2,128,1,float16,float16,0,0.23725332816441855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,64,128,1,float16,float16,0,0.24542399247487387
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,4,128,1,float16,float16,0,0.24060799678166708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,8,128,1,float16,float16,0,0.23915733893712363
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,1,128,1,float16,fp8,0,0.43913598855336505
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,4,128,1,float16,fp8,0,0.2207840085029602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,8,128,1,float16,fp8,0,0.2202933430671692
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,64,128,1,float16,fp8,0,0.22195732593536377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,2,128,1,float16,float16,0,0.4981866677602132
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,1,128,1,float16,fp8,0,0.11749866604804993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,4,128,1,float16,float16,0,0.13621333241462708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,64,128,1,float16,float16,0,0.1302720010280609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,4,128,1,float16,fp8,0,0.11733866731325786
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,2,128,1,float16,float16,0,0.13499200344085693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,64,128,1,float16,fp8,0,0.11841066678365071
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,2,128,1,float16,fp8,0,0.12288000186284383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,8,128,1,float16,float16,0,0.12775466839472452
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,8,128,1,float16,fp8,0,0.12289067109425862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,1,128,1,float16,fp8,0,0.0643039991458257
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,1,128,1,float16,float16,0,0.07780266801516215
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,2,128,1,float16,float16,0,0.0758133331934611
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,4,128,1,float16,float16,0,0.07097066442171733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,4,128,1,float16,fp8,0,0.06840533514817555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,2,128,1,float16,fp8,0,0.22744532426198324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,8,128,1,float16,float16,0,0.0719413310289383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,64,128,1,float16,float16,0,0.07375466823577881
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,8,128,1,float16,fp8,0,0.064410666624705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,1,128,1,float16,float16,0,0.042709335684776306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,1,128,1,float16,fp8,0,0.0397173340121905
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,2,128,1,float16,float16,0,0.04283200204372406
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,2,128,1,float16,fp8,0,0.037578667203585304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,4,128,1,float16,float16,0,0.04242133100827535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,1,128,1,float16,float16,0,0.13512532909711203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,64,128,1,float16,float16,0,0.04364799956480662
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,8,128,1,float16,float16,0,0.041706666350364685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,8,128,1,float16,fp8,0,0.039018665750821434
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,1,128,1,float16,float16,0,0.027301333844661713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,1,128,1,float16,fp8,0,0.024671999116738636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,2,128,1,float16,float16,0,0.027450665831565857
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,2,128,1,float16,fp8,0,0.024400000770886738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,4,128,1,float16,float16,0,0.02735999971628189
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,4,128,1,float16,fp8,0,0.02364266663789749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,64,128,1,float16,fp8,0,0.06623999774456024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,8,128,1,float16,float16,0,0.02605333427588145
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,8,128,1,float16,fp8,0,0.023408000667889912
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,4,128,1,float16,fp8,0,0.039093332986036934
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,1,128,1,float16,float16,0,0.017055999487638474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,64,128,1,float16,float16,0,0.01710933322707812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,64,128,1,float16,fp8,0,0.01685333376129468
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,64,128,1,float16,fp8,0,0.03766400118668874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,2,128,1,float16,float16,0,0.017008000363906223
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,2,128,1,float16,fp8,0,0.016869333883126576
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,4,128,1,float16,float16,0,0.016890666137139004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,4,128,1,float16,fp8,0,0.014981333166360855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,8,128,1,float16,fp8,0,0.014826666563749313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,8,128,1,float16,float16,0,0.017082666357358296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,64,128,1,float16,float16,0,0.02741866558790207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,64,128,1,float16,fp8,0,0.02465066562096278
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,2,128,1,float16,fp8,0,0.0661599983771642
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,1,128,1,float16,float16,0,0.7243946393330892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,2,128,1,float16,float16,0,0.725050687789917
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,2,128,1,float16,fp8,0,0.6811199982961019
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,4,128,1,float16,float16,0,0.7260159651438395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,1,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,4,128,1,float16,fp8,0,0.6864266395568848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,8,128,1,float16,float16,0,0.728111982345581
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,1,128,1,float16,fp8,0,0.3468426863352458
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,8,128,1,float16,fp8,0,0.6818613211313883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,64,128,1,float16,fp8,0,0.34725332260131836
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,2,128,1,float16,float16,0,0.37085334459940594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,64,128,1,float16,float16,0,0.3845440149307251
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,4,128,1,float16,float16,0,0.3696266810099284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,1,128,1,float16,fp8,0,0.6810932954152426
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,4,128,1,float16,fp8,0,0.3468693494796753
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,8,128,1,float16,float16,0,0.3869333267211914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,1,128,1,float16,float16,0,0.19973333676656088
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,64,128,1,float16,float16,0,0.24579733610153198
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,1,128,1,float16,fp8,0,0.17818133036295572
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,2,128,1,float16,fp8,0,0.18102399508158365
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,64,128,1,float16,fp8,0,0.18121600151062012
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,4,128,1,float16,float16,0,0.19693867365519205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,2,128,1,float16,float16,0,0.19929067293802896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,4,128,1,float16,fp8,0,0.17986667156219482
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,1,128,1,float16,float16,0,0.38419731458028156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,8,128,1,float16,float16,0,0.19570134083429971
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,1,128,1,float16,float16,0,0.10096533099810283
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,1,128,1,float16,fp8,0,0.09557867050170898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,8,128,1,float16,fp8,0,0.18094400564829508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,2,128,1,float16,float16,0,0.10753066341082256
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,2,128,1,float16,fp8,0,0.09294933080673218
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,4,128,1,float16,float16,0,0.1050933301448822
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,8,128,1,float16,float16,0,0.10567466417948405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,64,128,1,float16,fp8,0,0.09523733456929524
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,64,128,1,float16,float16,0,0.10873599847157796
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,4,128,1,float16,fp8,0,0.09506133198738098
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,8,128,1,float16,fp8,0,0.09380799531936646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,2,128,1,float16,fp8,0,0.35166935125986737
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,1,128,1,float16,float16,0,0.058362667759259544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,1,128,1,float16,fp8,0,0.05203733344872793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,2,128,1,float16,float16,0,0.058058664202690125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,2,128,1,float16,fp8,0,0.05189866820971171
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,4,128,1,float16,float16,0,0.05987200140953064
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,4,128,1,float16,fp8,0,0.05060266455014547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,64,128,1,float16,fp8,0,0.05190399785836538
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,64,128,1,float16,float16,0,0.05827199916044871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,1,128,1,float16,float16,0,0.03335466732581457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,1,128,1,float16,fp8,0,0.031504000226656594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,2,128,1,float16,fp8,0,0.03146133323510488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,2,128,1,float16,float16,0,0.035536001125971474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,4,128,1,float16,float16,0,0.03352533280849457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,64,128,1,float16,float16,0,0.03728000074625015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,4,128,1,float16,fp8,0,0.03142933299144109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,8,128,1,float16,fp8,0,0.03142933299144109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,8,128,1,float16,float16,0,0.033573334415753685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,1,128,1,float16,float16,0,0.02086399992307027
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,1,128,1,float16,fp8,0,0.019013332823912304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,2,128,1,float16,float16,0,0.021013334393501282
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,2,128,1,float16,fp8,0,0.019498666127522785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,4,128,1,float16,float16,0,0.020981334149837494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,4,128,1,float16,fp8,0,0.01926933353145917
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,8,128,1,float16,fp8,0,0.34671998023986816
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,8,128,1,float16,fp8,0,0.019071999937295914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,8,128,1,float16,float16,0,0.05600533386071523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,64,128,1,float16,fp8,0,0.013162666310866674
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,64,128,1,float16,float16,0,0.01498666654030482
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,1,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,2,128,1,float16,float16,0,0.014602666099866232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,64,128,1,float16,fp8,0,0.03126933425664902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,2,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,4,128,1,float16,float16,0,0.014975999792416891
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,4,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,8,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,8,128,1,float16,float16,0,0.012970666090647379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,64,128,1,float16,fp8,0,0.019050666441520054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,8,128,1,float16,float16,0,0.02063999945918719
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,1,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,64,128,1,float16,float16,0,0.013056000073750814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,2,128,1,float16,float16,0,0.012794667234023413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,64,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,2,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,8,128,1,float16,fp8,0,0.051701332132021584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,4,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,8,128,1,float16,float16,0,0.012874666601419449
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,8,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,1,128,1,float16,float16,0,0.014453332871198654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,1,128,1,float16,float16,0,0.4651413361231486
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,2,128,1,float16,fp8,0,0.44362131754557294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,64,128,1,float16,float16,0,0.02120000123977661
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,1,128,1,float16,float16,0,0.01292266696691513
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,4,128,1,float16,float16,0,0.46407465140024823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,4,128,1,float16,fp8,0,0.44302932421366376
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,4,128,1,float16,float16,0,0.012714666624863943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,8,128,1,float16,float16,0,0.46479467550913495
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,1,128,1,float16,float16,0,0.23802133401234946
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,8,128,1,float16,fp8,0,0.4413653214772542
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,64,128,1,float16,float16,0,0.2601013382275899
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,1,128,1,float16,fp8,0,0.22895999749501547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,2,128,1,float16,float16,0,0.2407146692276001
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,2,128,1,float16,fp8,0,0.22592000166575113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,64,128,1,float16,fp8,0,0.22604266802469888
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,1,128,1,float16,fp8,0,0.44553065299987793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,4,128,1,float16,float16,0,0.2393653392791748
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,4,128,1,float16,fp8,0,0.22936000426610312
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,8,128,1,float16,fp8,0,0.2286240061124166
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,1,128,1,float16,float16,0,0.12773866454760233
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,1,128,1,float16,fp8,0,0.11930666367212932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,2,128,1,float16,float16,0,0.12666666507720947
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,2,128,1,float16,fp8,0,0.1197653313477834
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,4,128,1,float16,float16,0,0.12873599926630655
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,64,128,1,float16,fp8,0,0.11900267004966736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,4,128,1,float16,fp8,0,0.1195093293984731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,64,128,1,float16,float16,0,0.12615467111269632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,8,128,1,float16,float16,0,0.12577600280443826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,1,128,1,float16,float16,0,0.06779733300209045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,8,128,1,float16,fp8,0,0.11925333738327026
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,1,128,1,float16,fp8,0,0.06433600187301636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,2,128,1,float16,float16,0,0.06829333305358887
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,4,128,1,float16,float16,0,0.06927466889222463
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,4,128,1,float16,fp8,0,0.06373866895834605
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,64,128,1,float16,fp8,0,0.06390400230884552
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,8,128,1,float16,fp8,0,0.06365866462389629
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,64,128,1,float16,float16,0,0.06838933130105336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,1,128,1,float16,float16,0,0.03709333389997482
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,2,128,1,float16,float16,0,0.46348265806833905
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,2,128,1,float16,float16,0,0.0373333344856898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,1,128,1,float16,fp8,0,0.03551999976237615
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,2,128,1,float16,fp8,0,0.03512533257404963
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,8,128,1,float16,float16,0,0.23933867613474527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,4,128,1,float16,fp8,0,0.035589332381884255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,64,128,1,float16,fp8,0,0.035232000052928925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,8,128,1,float16,float16,0,0.03737066686153412
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,64,128,1,float16,float16,0,0.04091733445723852
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,8,128,1,float16,fp8,0,0.03549866626660029
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,1,128,1,float16,float16,0,0.023056000471115112
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,1,128,1,float16,fp8,0,0.022917332748572033
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,2,128,1,float16,float16,0,0.02327999969323476
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,2,128,1,float16,fp8,0,0.02252800017595291
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,64,128,1,float16,fp8,0,0.022821334501107533
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,2,128,1,float16,fp8,0,0.06271466612815857
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,64,128,1,float16,float16,0,0.02329600105683009
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,8,128,1,float16,float16,0,0.022954667607943218
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,8,128,1,float16,fp8,0,0.021216000119845074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,1,128,1,float16,fp8,0,0.014864000181357065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,1,128,1,float16,float16,0,0.015226667126019796
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,8,128,1,float16,float16,0,0.06820266445477803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,2,128,1,float16,float16,0,0.015018666783968607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,2,128,1,float16,fp8,0,0.01481066644191742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,64,128,1,float16,fp8,0,0.015168000012636185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,4,128,1,float16,fp8,0,0.014682666709025701
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,4,128,1,float16,float16,0,0.015098666151364645
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,64,128,1,float16,float16,0,0.01505600040157636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,8,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,4,128,1,float16,float16,0,0.0373333344856898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,1,128,1,float16,float16,0,0.011055999745925268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,1,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,2,128,1,float16,float16,0,0.010591999938090643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,2,128,1,float16,fp8,0,0.010725333044926325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,4,128,1,float16,fp8,0,0.010666667173306147
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,64,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,64,128,1,float16,float16,0,0.011120000233252844
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,8,128,1,float16,float16,0,0.011066666493813196
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,8,128,1,float16,fp8,0,0.010608000059922537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,1,128,1,float16,float16,0,0.01091733326514562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,4,128,1,float16,fp8,0,0.02214933435122172
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,1,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,2,128,1,float16,float16,0,0.010970667004585266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,2,128,1,float16,fp8,0,0.01089599976936976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,4,128,1,float16,float16,0,0.010821333775917688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,4,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,64,128,1,float16,fp8,0,0.010506667196750641
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,8,128,1,float16,fp8,0,0.010608000059922537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,8,128,1,float16,float16,0,0.011018666128317514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,8,128,1,float16,float16,0,0.014762666076421738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,4,128,1,float16,float16,0,0.011039999624093374
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,4,128,1,float16,float16,0,0.02342933416366577
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,64,128,1,float16,float16,0,0.010901333143313726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,1,128,1,float16,fp8,0,0.37373868624369305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,2,128,1,float16,float16,0,0.391482671101888
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,1,128,1,float16,float16,0,0.3907359838485718
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,2,128,1,float16,fp8,0,0.3739733298619588
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,4,128,1,float16,fp8,0,0.37513065338134766
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,1,128,1,float16,float16,0,0.20440532763799033
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,1,128,1,float16,fp8,0,0.19197332859039307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,2,128,1,float16,float16,0,0.20284799734751383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,8,128,1,float16,float16,0,0.3914719820022583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,64,128,1,float16,float16,0,0.20206934213638306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,2,128,1,float16,fp8,0,0.1927893360455831
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,64,128,1,float16,fp8,0,0.19146132469177246
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,8,128,1,float16,fp8,0,0.3732000192006429
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,4,128,1,float16,fp8,0,0.19155732790629068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,4,128,1,float16,float16,0,0.20136533180872598
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,4,128,1,float16,float16,0,0.3931573232014974
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,64,128,1,float16,float16,0,0.10745066404342651
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,1,128,1,float16,fp8,0,0.10083199540774028
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,2,128,1,float16,float16,0,0.10921600461006165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,8,128,1,float16,float16,0,0.20382400353749594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,4,128,1,float16,fp8,0,0.10029333829879761
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,8,128,1,float16,float16,0,0.10703999797503154
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,1,128,1,float16,float16,0,0.10732799768447876
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,4,128,1,float16,float16,0,0.108815997838974
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,8,128,1,float16,fp8,0,0.193615992863973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,8,128,1,float16,fp8,0,0.1011199951171875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,2,128,1,float16,fp8,0,0.10214400291442871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,1,128,1,float16,float16,0,0.058037335673967995
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,4,128,1,float16,fp8,0,0.05394133428732554
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,64,128,1,float16,float16,0,0.05815466741720835
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,1,128,1,float16,fp8,0,0.05402133365472158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,64,128,1,float16,fp8,0,0.054085334142049156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,2,128,1,float16,fp8,0,0.05407466491063436
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,4,128,1,float16,float16,0,0.06016000111897787
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,2,128,1,float16,float16,0,0.05858666698137919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,8,128,1,float16,float16,0,0.05823466678460439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,1,128,1,float16,float16,0,0.03334933271010717
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,1,128,1,float16,fp8,0,0.03138133386770884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,2,128,1,float16,float16,0,0.03334933271010717
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,8,128,1,float16,fp8,0,0.053770666321118675
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,2,128,1,float16,fp8,0,0.03084266682465871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,4,128,1,float16,float16,0,0.03349866718053818
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,4,128,1,float16,fp8,0,0.0296426663796107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,8,128,1,float16,float16,0,0.03147733211517334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,8,128,1,float16,fp8,0,0.02942399928967158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,1,128,1,float16,float16,0,0.021253332495689392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,64,128,1,float16,float16,0,0.03283733377854029
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,1,128,1,float16,fp8,0,0.01931200052301089
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,2,128,1,float16,fp8,0,0.019248000035683315
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,64,128,1,float16,fp8,0,0.030026666820049286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,4,128,1,float16,fp8,0,0.019258666783571243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,8,128,1,float16,float16,0,0.021114667256673176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,8,128,1,float16,fp8,0,0.019215999792019527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,64,128,1,float16,fp8,0,0.10001066327095032
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,64,128,1,float16,float16,0,0.02109866589307785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,4,128,1,float16,float16,0,0.021269333859284718
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,1,128,1,float16,float16,0,0.014661333213249842
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,64,128,1,float16,float16,0,0.013749333719412485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,64,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,2,128,1,float16,float16,0,0.01452800010641416
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,1,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,8,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,8,128,1,float16,float16,0,0.013317332913478216
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,1,128,1,float16,float16,0,0.010992000500361124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,1,128,1,float16,fp8,0,0.009946666657924652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,2,128,1,float16,float16,0,0.02107200026512146
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,2,128,1,float16,float16,0,0.010933333386977514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,64,128,1,float16,float16,0,0.010608000059922537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,2,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,64,128,1,float16,fp8,0,0.019178666174411774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,4,128,1,float16,float16,0,0.01097600037852923
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,8,128,1,float16,float16,0,0.010773333410422007
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,4,128,1,float16,fp8,0,0.010597333312034607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,1,128,1,float16,float16,0,0.009050666665037474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,2,128,1,float16,fp8,0,0.013178666432698568
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,1,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,2,128,1,float16,float16,0,0.009114666531483332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,64,128,1,float16,fp8,0,0.008810666700204214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,2,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,4,128,1,float16,float16,0,0.01392000044385592
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,4,128,1,float16,float16,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,64,128,1,float16,float16,0,0.010768000036478043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,8,128,1,float16,float16,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,8,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,4,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,64,128,1,float16,fp8,0,0.009904000287254652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,1,128,1,float16,float16,0,0.35148799419403076
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,2,128,1,float16,fp8,0,0.3365600109100342
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,2,128,1,float16,float16,0,0.35154132048288983
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,4,128,1,float16,float16,0,0.3511306842168172
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,8,128,1,float16,fp8,0,0.01051733394463857
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,4,128,1,float16,fp8,0,0.3375306526819865
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,1,128,1,float16,float16,0,0.1821920077006022
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,4,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,8,128,1,float16,fp8,0,0.3363093137741089
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,8,128,1,float16,float16,0,0.34993600845336914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,64,128,1,float16,float16,0,0.18184000253677368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,2,128,1,float16,float16,0,0.1816906730333964
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,64,128,1,float16,fp8,0,0.17277334133783975
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,1,128,1,float16,fp8,0,0.1747786601384481
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,4,128,1,float16,float16,0,0.1824000080426534
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,2,128,1,float16,fp8,0,0.17271467049916586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,8,128,1,float16,float16,0,0.18492267529169717
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,1,128,1,float16,float16,0,0.09616000453631084
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,1,128,1,float16,fp8,0,0.3396373192469279
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,8,128,1,float16,fp8,0,0.1732106606165568
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,64,128,1,float16,fp8,0,0.09071466326713562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,4,128,1,float16,fp8,0,0.1749066710472107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,1,128,1,float16,fp8,0,0.0925439993540446
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,2,128,1,float16,float16,0,0.09497066338857015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,64,128,1,float16,float16,0,0.09738133351008098
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,4,128,1,float16,float16,0,0.09728533029556274
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,8,128,1,float16,fp8,0,0.09115733702977498
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,4,128,1,float16,fp8,0,0.09070932865142822
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,8,128,1,float16,float16,0,0.09724266330401103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,2,128,1,float16,fp8,0,0.09151466687520345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,2,128,1,float16,float16,0,0.053823997577031456
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,4,128,1,float16,float16,0,0.05431999762852987
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,64,128,1,float16,float16,0,0.05446400245030721
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,4,128,1,float16,fp8,0,0.04770666857560476
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,64,128,1,float16,fp8,0,0.04961066444714864
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,8,128,1,float16,float16,0,0.05259733398755392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,8,128,1,float16,fp8,0,0.04832000037034353
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,1,128,1,float16,float16,0,0.03152533372243246
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,1,128,1,float16,fp8,0,0.027295999228954315
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,2,128,1,float16,float16,0,0.03128000100453695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,2,128,1,float16,fp8,0,0.027424000203609467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,64,128,1,float16,float16,0,0.03128000100453695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,4,128,1,float16,float16,0,0.03148266673088074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,4,128,1,float16,fp8,0,0.0271573339899381
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,8,128,1,float16,float16,0,0.03105599929889043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,8,128,1,float16,fp8,0,0.028773332635561626
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,1,128,1,float16,float16,0,0.019199999670187633
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,1,128,1,float16,fp8,0,0.01918399954835574
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,2,128,1,float16,float16,0,0.019258666783571243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,64,128,1,float16,fp8,0,0.019061333189407986
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,64,128,1,float16,float16,0,0.01926933353145917
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,1,128,1,float16,fp8,0,0.049839998284975685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,2,128,1,float16,fp8,0,0.0191040001809597
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,4,128,1,float16,float16,0,0.01911466692884763
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,1,128,1,float16,float16,0,0.05649599929650625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,8,128,1,float16,float16,0,0.020058666666348774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,2,128,1,float16,fp8,0,0.050016000866889954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,8,128,1,float16,fp8,0,0.019146667172511418
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,1,128,1,float16,float16,0,0.012981332838535309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,2,128,1,float16,float16,0,0.012858666479587555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,2,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,4,128,1,float16,float16,0,0.012997332960367203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,8,128,1,float16,float16,0,0.012917333592971167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,8,128,1,float16,fp8,0,0.012938667088747025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,64,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,64,128,1,float16,fp8,0,0.027552001178264618
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,1,128,1,float16,float16,0,0.010634666929642359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,2,128,1,float16,float16,0,0.010741333166758219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,1,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,64,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,2,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,4,128,1,float16,float16,0,0.010602666685978571
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,4,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,8,128,1,float16,float16,0,0.010757333288590113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,8,128,1,float16,fp8,0,0.008853333070874214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,1,128,1,float16,float16,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,4,128,1,float16,fp8,0,0.018911999960740406
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,1,128,1,float16,fp8,0,0.008821333448092142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,2,128,1,float16,float16,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,2,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,4,128,1,float16,float16,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,4,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,4,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,8,128,1,float16,float16,0,0.009050666665037474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,8,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,64,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,64,128,1,float16,float16,0,0.013258667041858038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,1,128,1,float16,float16,0,0.34647464752197266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,1,128,1,float16,fp8,0,0.32418133815129596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,2,128,1,float16,float16,0,0.3471039930979411
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,64,128,1,float16,float16,0,0.011034666250149408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,2,128,1,float16,fp8,0,0.3248479962348938
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,4,128,1,float16,float16,0,0.3469173510869344
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,4,128,1,float16,fp8,0,0.32435200611750287
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,8,128,1,float16,float16,0,0.344810684521993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,8,128,1,float16,fp8,0,0.32368000348409015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,1,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,64,128,1,float16,fp8,0,0.16754132509231567
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,64,128,1,float16,float16,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,1,128,1,float16,fp8,0,0.16684265931447348
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,2,128,1,float16,float16,0,0.17921066284179688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,2,128,1,float16,fp8,0,0.16760534048080444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,4,128,1,float16,float16,0,0.1813760002454122
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,4,128,1,float16,fp8,0,0.17067732413609824
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,8,128,1,float16,fp8,0,0.16661866505940756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,8,128,1,float16,float16,0,0.17779733737309775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,1,128,1,float16,float16,0,0.09773866335550944
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,1,128,1,float16,fp8,0,0.08937600255012512
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,2,128,1,float16,float16,0,0.09659199913342793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,64,128,1,float16,float16,0,0.09691199660301208
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,2,128,1,float16,fp8,0,0.08886933326721191
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,4,128,1,float16,fp8,0,0.0869706670443217
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,8,128,1,float16,float16,0,0.09427733222643535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,8,128,1,float16,fp8,0,0.08718400200208028
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,1,128,1,float16,float16,0,0.17956799268722534
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,1,128,1,float16,float16,0,0.052986666560173035
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,1,128,1,float16,fp8,0,0.047557334105173744
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,2,128,1,float16,float16,0,0.05202666421731313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,64,128,1,float16,float16,0,0.05440000196297964
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,64,128,1,float16,float16,0,0.18159466981887817
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,2,128,1,float16,fp8,0,0.04748799900213877
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,4,128,1,float16,float16,0,0.0528106689453125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,4,128,1,float16,fp8,0,0.04825599988301595
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,8,128,1,float16,float16,0,0.053082664807637535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,8,128,1,float16,fp8,0,0.04866666595141093
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,1,128,1,float16,float16,0,0.030752000709374745
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,1,128,1,float16,fp8,0,0.027237333357334137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,2,128,1,float16,float16,0,0.02939733366171519
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,2,128,1,float16,fp8,0,0.027349332968393963
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,4,128,1,float16,float16,0,0.029306667546431225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,4,128,1,float16,fp8,0,0.027263998985290527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,64,128,1,float16,fp8,0,0.08878933389981587
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,8,128,1,float16,float16,0,0.029498666524887085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,4,128,1,float16,float16,0,0.09646933277448018
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,8,128,1,float16,fp8,0,0.027210667729377747
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,1,128,1,float16,float16,0,0.01915733392039935
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,1,128,1,float16,fp8,0,0.019066666563351948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,2,128,1,float16,float16,0,0.01889066646496455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,2,128,1,float16,fp8,0,0.018618666877349217
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,64,128,1,float16,fp8,0,0.01852799952030182
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,4,128,1,float16,float16,0,0.018863999595244724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,4,128,1,float16,fp8,0,0.019066666563351948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,8,128,1,float16,float16,0,0.01893866683046023
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,8,128,1,float16,fp8,0,0.018863999595244724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,64,128,1,float16,fp8,0,0.04784533381462097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,64,128,1,float16,float16,0,0.019082666685183842
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,1,128,1,float16,float16,0,0.012928000340859095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,2,128,1,float16,fp8,0,0.012682666381200155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,64,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,2,128,1,float16,float16,0,0.013023999830087027
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,4,128,1,float16,float16,0,0.013130666067202887
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,1,128,1,float16,fp8,0,0.013855999956528345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,64,128,1,float16,fp8,0,0.027637332677841187
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,4,128,1,float16,fp8,0,0.012655999511480331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,8,128,1,float16,float16,0,0.0129120002190272
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,1,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,1,128,1,float16,float16,0,0.010725333044926325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,2,128,1,float16,float16,0,0.010922666639089584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,64,128,1,float16,fp8,0,0.00877333308259646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,4,128,1,float16,fp8,0,0.00921066664159298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,64,128,1,float16,float16,0,0.010602666685978571
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,4,128,1,float16,float16,0,0.010703999549150467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,8,128,1,float16,float16,0,0.010698666175206503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,8,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,1,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,2,128,1,float16,float16,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,2,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,64,128,1,float16,float16,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,64,128,1,float16,fp8,0,0.008799999952316284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,4,128,1,float16,float16,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,8,128,1,float16,float16,0,0.009018666421373686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,8,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,64,128,1,float16,float16,0,0.013221333424250284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,64,128,1,float16,float16,0,0.03044266750415166
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,8,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,2,128,1,float16,fp8,0,0.009061333412925402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,1,128,1,float16,float16,0,0.012778667112191519
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,1,128,1,float16,fp8,0,0.010885333021481832
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,2,128,1,float16,float16,0,0.019167999426523846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,4,128,1,float16,float16,0,0.021738665799299877
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,1,128,1,float16,float16,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,2,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,0,0.016879999389251072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,0,0.02310933421055476
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,8,128,1,float16,float16,0,0.02733866622050603
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,1,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,1,128,1,float16,float16,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,4,128,1,float16,fp8,0,0.009082666908701261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,64,128,1,float16,float16,0,0.09691199660301208
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,2,128,1,float16,fp8,0,0.010885333021481832
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,0,0.01711999997496605
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,1,128,1,float16,float16,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,0,0.01498666654030482
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,1,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,2,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,2,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,64,128,1,float16,float16,0,0.05268266797065735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,0,0.010911999891201654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,64,128,1,float16,fp8,0,0.03958933303753535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,0,0.009232000137368837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,0,0.01119999960064888
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,1,128,1,float16,float16,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,2,128,1,float16,float16,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,2,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,2,128,1,float16,float16,0,0.012602667013804117
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,0,0.007114666824539502
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,64,128,1,float16,float16,0,0.027493332823117573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,0,0.008682666967312494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,0,0.014949332922697067
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,1,128,1,float16,float16,0,0.008597333605090777
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,64,128,1,float16,fp8,0,0.07253333429495494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,2,128,1,float16,float16,0,0.00860799973209699
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,1,128,1,float16,fp8,0,0.008682666967312494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,64,128,1,float16,fp8,0,0.015082667271296183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,2,128,1,float16,fp8,0,0.008687999720374743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,0,0.0069759997228781385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,0,0.008538666491707167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,0,0.0069919998447100324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,1,128,1,float16,float16,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,1,128,1,float16,fp8,0,0.008650666723648706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,1,128,1,float16,fp8,0,0.008842666943868002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,2,128,1,float16,float16,0,0.008613333106040955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,64,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,2,128,1,float16,fp8,0,0.008799999952316284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,0,0.0069333333522081375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,0,0.006693333387374878
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,0,0.0068800002336502075
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,64,128,1,float16,fp8,0,0.023269332945346832
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,1,128,1,float16,float16,0,0.008799999952316284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,1,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,2,128,1,float16,float16,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,64,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,2,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,0,0.007029333462317784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,0,0.006826666494210561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,0,0.006821333120266597
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,0,0.006730666384100914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,1,128,1,float16,float16,0,0.008832000195980072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,64,128,1,float16,fp8,0,0.0069973332186539965
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,1,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,64,128,1,float16,float16,0,0.012709333250919977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,2,128,1,float16,float16,0,0.00850133349498113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,2,128,1,float16,fp8,0,0.008810666700204214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,0,0.00690133310854435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,0,0.006581333155433337
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,64,128,1,float16,float16,0,0.008287999778985977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,0,0.006853333363930385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,0,0.007125333572427432
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,1,128,1,float16,float16,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,1,128,1,float16,fp8,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,2,128,1,float16,float16,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,2,128,1,float16,fp8,0,0.008810666700204214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,0,0.0069973332186539965
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,0,0.007093333328763644
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,64,128,1,float16,fp8,0,0.006895999734600385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,0,0.00706666645904382
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,64,128,1,float16,float16,0,0.008709333216150602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,64,128,1,float16,float16,0,0.017071999609470367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,0,0.006575999781489372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,0,0.006895999734600385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,64,128,1,float16,float16,0,0.0068853336075941724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,0,0.0069226666043202085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,1,128,1,float16,fp8,0,15.746346791585287
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,1,128,1,float16,float16,0,16.369130452473957
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,2,128,1,float16,float16,0,16.373477935791016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,2,128,1,float16,fp8,0,15.63210678100586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,4,128,1,float16,float16,0,16.370992024739582
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,4,128,1,float16,fp8,0,15.11459732055664
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,8,128,1,float16,fp8,0,15.131355285644531
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,8,128,1,float16,float16,0,16.87928517659505
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,1,128,1,float16,fp8,0,8.097007751464844
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,48,128,1,float16,fp8,0,7.627002716064453
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,48,128,1,float16,float16,0,8.26918919881185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,1,128,1,float16,float16,0,11.92584482828776
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,2,128,1,float16,fp8,0,8.089594523111979
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,2,128,1,float16,float16,0,11.64687983194987
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,48,128,1,float16,float16,0,4.680063883463542
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,4,128,1,float16,fp8,0,8.111557642618815
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,1,128,1,float16,float16,0,4.193269411722819
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,8,128,1,float16,float16,0,8.258143742879232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,48,128,1,float16,fp8,0,4.549167950948079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,1,128,1,float16,fp8,0,5.021701176961263
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,4,128,1,float16,float16,0,11.545115152994791
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,2,128,1,float16,fp8,0,4.154634793599446
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,4,128,1,float16,fp8,0,4.114218711853027
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,2,128,1,float16,float16,0,4.722517331441243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,8,128,1,float16,fp8,0,3.8822078704833984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,48,128,1,float16,float16,0,2.2784266471862793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,8,128,1,float16,float16,0,5.602495829264323
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,1,128,1,float16,float16,0,2.15883731842041
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,8,128,1,float16,fp8,0,7.617141087849935
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,1,128,1,float16,fp8,0,2.2758399645487466
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,48,128,1,float16,fp8,0,1.9939360618591309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,2,128,1,float16,float16,0,2.1605067253112793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,2,128,1,float16,fp8,0,2.1706666946411133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,4,128,1,float16,fp8,0,1.9928107261657715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,4,128,1,float16,float16,0,2.6909974416097007
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,8,128,1,float16,float16,0,2.1623199780782065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,8,128,1,float16,fp8,0,2.175957361857096
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,4,128,1,float16,float16,0,4.194597244262695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,1,128,1,float16,fp8,0,9.75772794087728
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,2,128,1,float16,fp8,0,8.725269317626953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,2,128,1,float16,float16,0,9.437274932861328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,4,128,1,float16,fp8,0,8.720917383829752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,1,128,1,float16,float16,0,13.53872553507487
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,8,128,1,float16,fp8,0,9.299381256103516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,8,128,1,float16,float16,0,11.646906534830729
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,4,128,1,float16,float16,0,13.384459177652994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,1,128,1,float16,fp8,0,4.701029459635417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,48,128,1,float16,fp8,0,4.574682553609212
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,48,128,1,float16,float16,0,4.855770746866862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,1,128,1,float16,float16,0,6.261477152506511
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,2,128,1,float16,fp8,0,4.85374387105306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,2,128,1,float16,float16,0,6.503482818603516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,4,128,1,float16,fp8,0,4.405925432840983
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,4,128,1,float16,float16,0,6.339743932088216
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,8,128,1,float16,fp8,0,4.406698544820149
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,48,128,1,float16,float16,0,2.821711858113607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,1,128,1,float16,float16,0,3.303039868672689
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,8,128,1,float16,float16,0,6.268357594807942
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,48,128,1,float16,fp8,0,2.413562615712484
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,1,128,1,float16,fp8,0,2.3983093897501626
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,2,128,1,float16,float16,0,3.272416114807129
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,2,128,1,float16,fp8,0,2.4006826082865396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,4,128,1,float16,float16,0,2.620474656422933
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,4,128,1,float16,fp8,0,3.0818986892700195
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,8,128,1,float16,fp8,0,2.385514736175537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,1,128,1,float16,float16,0,1.5193333625793457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,8,128,1,float16,float16,0,3.4516000747680664
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,48,128,1,float16,float16,0,1.2714186509450276
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,2,128,1,float16,fp8,0,1.2463520367940266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,4,128,1,float16,float16,0,1.2675786813100178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,4,128,1,float16,fp8,0,1.5167733828226726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,8,128,1,float16,float16,0,1.267957369486491
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,8,128,1,float16,fp8,0,1.1694080034891765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,1,128,1,float16,fp8,0,6.176885604858398
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,2,128,1,float16,float16,0,1.3230133056640625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,2,128,1,float16,fp8,0,6.193583806355794
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,2,128,1,float16,float16,0,7.029061635335286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,1,128,1,float16,float16,0,9.0043093363444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,4,128,1,float16,float16,0,6.674069086710612
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,1,128,1,float16,float16,0,4.229088147481282
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,48,128,1,float16,float16,0,3.6376161575317383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,48,128,1,float16,fp8,0,3.2852001190185547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,4,128,1,float16,fp8,0,6.817893346150716
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,8,128,1,float16,fp8,0,6.178927739461263
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,1,128,1,float16,fp8,0,1.2685226599375408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,48,128,1,float16,fp8,0,1.5884532928466797
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,8,128,1,float16,float16,0,9.425925572713217
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,2,128,1,float16,fp8,0,3.1419785817464194
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,2,128,1,float16,float16,0,3.534485181172689
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,4,128,1,float16,fp8,0,3.3407039642333984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,4,128,1,float16,float16,0,4.127519925435384
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,8,128,1,float16,float16,0,3.3842665354410806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,8,128,1,float16,fp8,0,3.343071937561035
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,1,128,1,float16,float16,0,2.1678239504496255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,1,128,1,float16,fp8,0,1.7091093063354492
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,48,128,1,float16,float16,0,2.4908906618754068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,2,128,1,float16,fp8,0,1.7142079671223958
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,48,128,1,float16,fp8,0,1.6064106623331706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,2,128,1,float16,float16,0,2.112325350443522
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,4,128,1,float16,float16,0,1.8517173131306965
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,1,128,1,float16,fp8,0,0.9418346881866455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,1,128,1,float16,float16,0,1.2030933698018391
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,8,128,1,float16,float16,0,1.734773317972819
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,8,128,1,float16,fp8,0,1.786080042521159
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,48,128,1,float16,fp8,0,0.898080031077067
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,48,128,1,float16,float16,0,0.9165706634521484
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,4,128,1,float16,float16,0,0.968618631362915
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,1,128,1,float16,fp8,0,3.575584093729655
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,4,128,1,float16,fp8,0,0.848848025004069
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,8,128,1,float16,float16,0,0.971407969792684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,8,128,1,float16,fp8,0,0.898085355758667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,4,128,1,float16,fp8,0,2.0155253410339355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,2,128,1,float16,float16,0,0.9894560178120931
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,1,128,1,float16,float16,0,8.708415985107422
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,2,128,1,float16,fp8,0,8.091477076212565
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,1,128,1,float16,fp8,0,8.636645634969076
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,4,128,1,float16,float16,0,8.71786117553711
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,2,128,1,float16,float16,0,10.648325602213541
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,4,128,1,float16,fp8,0,8.56612777709961
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,2,128,1,float16,fp8,0,0.9704106648763021
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,1,128,1,float16,float16,0,4.392730712890625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,48,128,1,float16,fp8,0,4.0843305587768555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,48,128,1,float16,float16,0,4.414085388183594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,8,128,1,float16,fp8,0,8.6767946879069
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,8,128,1,float16,float16,0,12.325509389241537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,2,128,1,float16,fp8,0,4.35371748606364
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,2,128,1,float16,float16,0,5.184874534606934
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,4,128,1,float16,fp8,0,4.07205867767334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,4,128,1,float16,float16,0,5.572330474853516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,8,128,1,float16,float16,0,5.976314544677734
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,1,128,1,float16,float16,0,2.7840426762898765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,48,128,1,float16,fp8,0,2.0740960439046225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,8,128,1,float16,fp8,0,4.366298675537109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,1,128,1,float16,fp8,0,2.211322625478109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,48,128,1,float16,float16,0,2.729615847269694
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,2,128,1,float16,float16,0,3.099301338195801
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,2,128,1,float16,fp8,0,2.6114560763041177
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,4,128,1,float16,fp8,0,2.2146453857421875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,8,128,1,float16,float16,0,2.7516374588012695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,1,128,1,float16,float16,0,1.2766133149464924
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,48,128,1,float16,float16,0,1.431349277496338
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,48,128,1,float16,fp8,0,1.1006080309549968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,1,128,1,float16,fp8,0,1.1409760316212971
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,2,128,1,float16,float16,0,1.4238559405008953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,2,128,1,float16,fp8,0,1.0794400374094646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,4,128,1,float16,fp8,0,1.1464426517486572
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,4,128,1,float16,float16,0,1.39464537302653
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,8,128,1,float16,float16,0,1.151642640431722
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,8,128,1,float16,fp8,0,1.07314133644104
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,1,128,1,float16,fp8,0,4.5108747482299805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,1,128,1,float16,float16,0,0.7181173165639242
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,1,128,1,float16,fp8,0,0.6064159870147705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,48,128,1,float16,fp8,0,0.613045334815979
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,2,128,1,float16,float16,0,0.7301973501841227
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,2,128,1,float16,fp8,0,0.6038133303324381
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,48,128,1,float16,float16,0,0.6269226471583048
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,4,128,1,float16,float16,0,0.7304906845092773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,4,128,1,float16,fp8,0,0.6081600189208984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,4,128,1,float16,float16,0,3.1150080362955728
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,1,128,1,float16,fp8,0,5.0060427983601885
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,1,128,1,float16,float16,0,6.00059191385905
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,2,128,1,float16,fp8,0,5.0913651784261065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,2,128,1,float16,float16,0,6.331600189208984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,8,128,1,float16,float16,0,0.65011199315389
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,8,128,1,float16,fp8,0,0.6034026543299357
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,4,128,1,float16,float16,0,5.110656102498372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,4,128,1,float16,fp8,0,5.0893707275390625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,1,128,1,float16,float16,0,2.589632034301758
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,8,128,1,float16,fp8,0,4.77785587310791
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,48,128,1,float16,float16,0,2.747690518697103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,48,128,1,float16,fp8,0,2.5741492907206216
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,1,128,1,float16,fp8,0,2.4013546307881675
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,2,128,1,float16,float16,0,2.791872024536133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,2,128,1,float16,fp8,0,2.5659839312235513
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,4,128,1,float16,float16,0,2.857189178466797
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,8,128,1,float16,fp8,0,2.0680480003356934
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,8,128,1,float16,float16,0,2.8767573038736978
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,8,128,1,float16,fp8,0,2.5723253885904946
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,1,128,1,float16,float16,0,1.6305279731750488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,48,128,1,float16,fp8,0,1.2311093012491863
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,1,128,1,float16,fp8,0,1.2257226308186848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,48,128,1,float16,float16,0,1.3669546445210774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,2,128,1,float16,float16,0,1.6230932871500652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,2,128,1,float16,fp8,0,1.3107679684956868
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,4,128,1,float16,float16,0,1.6280852953592937
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,4,128,1,float16,fp8,0,1.2259893417358398
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,8,128,1,float16,float16,0,6.483989079793294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,8,128,1,float16,fp8,0,1.3176000118255615
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,1,128,1,float16,float16,0,0.8378880023956299
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,48,128,1,float16,float16,0,0.7456106344858805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,2,128,1,float16,float16,0,0.8286826610565186
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,48,128,1,float16,fp8,0,0.641813317934672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,4,128,1,float16,float16,0,0.8714719613393148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,4,128,1,float16,fp8,0,0.6918506622314453
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,4,128,1,float16,fp8,0,2.5962346394856772
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,8,128,1,float16,fp8,0,0.6846880118052164
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,8,128,1,float16,float16,0,0.6897173722585043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,1,128,1,float16,float16,0,0.4287946621576945
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,2,128,1,float16,float16,0,0.37773334980010986
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,48,128,1,float16,float16,0,0.4053066571553548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,48,128,1,float16,fp8,0,0.35070399443308514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,4,128,1,float16,float16,0,0.37483731905619305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,4,128,1,float16,fp8,0,0.3519573211669922
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,8,128,1,float16,float16,0,0.3752853473027547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,8,128,1,float16,fp8,0,0.36905066172281903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,8,128,1,float16,float16,0,1.3213120301564534
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,1,128,1,float16,fp8,0,0.6382986704508463
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,2,128,1,float16,fp8,0,0.6860319773356119
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,1,128,1,float16,float16,0,4.877338727315267
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,1,128,1,float16,fp8,0,0.3680959939956665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,2,128,1,float16,fp8,0,4.557722727457683
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,2,128,1,float16,fp8,0,0.3696800072987874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,2,128,1,float16,float16,0,5.827311833699544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,4,128,1,float16,float16,0,4.880384127298991
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,4,128,1,float16,fp8,0,4.871653238932292
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,1,128,1,float16,float16,0,3.0437545776367188
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,1,128,1,float16,fp8,0,2.297349294026693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,48,128,1,float16,fp8,0,2.311946709950765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,48,128,1,float16,float16,0,2.4970614115397134
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,8,128,1,float16,fp8,0,4.561845461527507
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,2,128,1,float16,float16,0,3.0304107666015625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,1,128,1,float16,fp8,0,4.557690620422363
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,4,128,1,float16,float16,0,2.4611199696858725
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,2,128,1,float16,fp8,0,3.130799929300944
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,4,128,1,float16,fp8,0,2.2989652951558432
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,8,128,1,float16,fp8,0,2.454458713531494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,8,128,1,float16,float16,0,3.0034828186035156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,48,128,1,float16,float16,0,1.3539306322733562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,1,128,1,float16,fp8,0,1.2775146961212158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,2,128,1,float16,float16,0,1.3319413661956787
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,2,128,1,float16,fp8,0,1.1676212946573894
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,4,128,1,float16,float16,0,1.5162026087443035
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,4,128,1,float16,fp8,0,1.280138651529948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,8,128,1,float16,float16,0,1.28166397412618
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,8,128,1,float16,float16,0,4.8882293701171875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,48,128,1,float16,float16,0,0.6753439903259277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,48,128,1,float16,fp8,0,0.7205920219421387
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,1,128,1,float16,fp8,0,0.6454879840215048
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,2,128,1,float16,float16,0,0.7840800285339355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,2,128,1,float16,fp8,0,0.6434613466262817
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,8,128,1,float16,float16,0,0.7704106966654459
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,8,128,1,float16,fp8,0,0.6428426504135132
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,1,128,1,float16,float16,0,1.4993759791056316
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,48,128,1,float16,fp8,0,1.1744799613952637
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,48,128,1,float16,float16,0,0.3790666659673055
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,1,128,1,float16,float16,0,0.3919839859008789
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,48,128,1,float16,fp8,0,0.32255999247233075
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,1,128,1,float16,fp8,0,0.3210986653963725
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,2,128,1,float16,float16,0,0.3938719828923543
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,8,128,1,float16,fp8,0,1.1686506271362305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,2,128,1,float16,fp8,0,0.34249067306518555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,4,128,1,float16,fp8,0,0.3430826663970947
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,4,128,1,float16,float16,0,0.34564268589019775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,8,128,1,float16,float16,0,0.36718400319417316
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,1,128,1,float16,float16,0,0.7766559918721517
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,8,128,1,float16,fp8,0,0.343120018641154
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,1,128,1,float16,float16,0,0.21512534221013388
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,2,128,1,float16,float16,0,0.21406932671864828
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,1,128,1,float16,fp8,0,0.19394133488337198
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,48,128,1,float16,fp8,0,0.18126932779947916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,4,128,1,float16,float16,0,0.7664480209350586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,4,128,1,float16,float16,0,0.21502933899561563
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,4,128,1,float16,fp8,0,0.19229867060979208
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,8,128,1,float16,fp8,0,0.1957813302675883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,8,128,1,float16,float16,0,0.19932266076405844
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,1,128,1,float16,fp8,0,2.766245206197103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,2,128,1,float16,fp8,0,2.7667201360066733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,1,128,1,float16,float16,0,3.6177333196004233
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,2,128,1,float16,float16,0,3.5393492380777993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,2,128,1,float16,fp8,0,0.19234132766723633
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,4,128,1,float16,fp8,0,0.6442133188247681
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,48,128,1,float16,float16,0,0.210533340771993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,4,128,1,float16,fp8,0,2.7687466939290366
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,4,128,1,float16,float16,0,2.9490025838216147
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,8,128,1,float16,float16,0,3.2574132283528647
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,8,128,1,float16,fp8,0,3.10861873626709
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,1,128,1,float16,float16,0,1.7717706362406414
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,1,128,1,float16,fp8,0,1.4876532554626465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,48,128,1,float16,fp8,0,1.4098025957743328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,48,128,1,float16,float16,0,1.5173759460449219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,2,128,1,float16,fp8,0,1.4871679941813152
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,4,128,1,float16,float16,0,1.635749340057373
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,4,128,1,float16,fp8,0,1.4975093205769856
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,1,128,1,float16,float16,0,0.7609600226084391
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,8,128,1,float16,fp8,0,1.486570676167806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,8,128,1,float16,float16,0,1.5579412778218586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,1,128,1,float16,fp8,0,0.7674240271250407
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,2,128,1,float16,float16,0,0.7629706859588623
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,48,128,1,float16,float16,0,0.7919306755065918
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,4,128,1,float16,fp8,0,0.7937760353088379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,8,128,1,float16,fp8,0,0.7159199714660645
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,8,128,1,float16,float16,0,0.7628586292266846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,1,128,1,float16,float16,0,0.39792533715566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,1,128,1,float16,fp8,0,0.3963306744893392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,48,128,1,float16,float16,0,0.4368906815846761
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,48,128,1,float16,fp8,0,0.3814613421758016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,2,128,1,float16,fp8,0,0.3958880106608073
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,4,128,1,float16,float16,0,0.47942399978637695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,4,128,1,float16,fp8,0,0.3743893305460612
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,2,128,1,float16,float16,0,1.5825279553731282
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,8,128,1,float16,float16,0,0.3982400099436442
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,8,128,1,float16,fp8,0,0.39587732156117755
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,1,128,1,float16,float16,0,0.2412373423576355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,1,128,1,float16,fp8,0,0.2151413361231486
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,2,128,1,float16,float16,0,0.2417866587638855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,48,128,1,float16,float16,0,0.2399946649869283
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,48,128,1,float16,fp8,0,0.2116853396097819
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,4,128,1,float16,float16,0,0.2411359945933024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,4,128,1,float16,float16,0,0.8086773554484049
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,48,128,1,float16,fp8,0,0.7195786635080973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,4,128,1,float16,fp8,0,0.21820799509684244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,8,128,1,float16,float16,0,0.22248532374699911
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,8,128,1,float16,fp8,0,0.21524266401926676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,1,128,1,float16,float16,0,0.13853866855303446
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,1,128,1,float16,fp8,0,0.12454400459925334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,2,128,1,float16,float16,0,0.1362399955590566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,2,128,1,float16,fp8,0,0.12557333707809448
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,4,128,1,float16,fp8,0,0.12497599919637044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,48,128,1,float16,fp8,0,0.11715199549992879
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,2,128,1,float16,float16,0,0.39800532658894855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,8,128,1,float16,float16,0,0.12517333030700684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,8,128,1,float16,fp8,0,0.12372799714406331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,1,128,1,float16,float16,0,2.955439885457357
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,1,128,1,float16,fp8,0,2.9558080037434897
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,2,128,1,float16,fp8,0,0.7138826847076416
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,2,128,1,float16,fp8,0,2.7962719599405923
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,2,128,1,float16,float16,0,3.433151880900065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,4,128,1,float16,float16,0,0.13587733109792074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,4,128,1,float16,float16,0,2.959178606669108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,4,128,1,float16,fp8,0,2.954165458679199
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,8,128,1,float16,float16,0,2.9656639099121094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,8,128,1,float16,fp8,0,2.8000265757242837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,1,128,1,float16,float16,0,1.7652799288431804
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,48,128,1,float16,fp8,0,1.4277280171712239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,1,128,1,float16,fp8,0,1.488357384999593
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,2,128,1,float16,fp8,0,0.2168160080909729
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,2,128,1,float16,fp8,0,1.488869349161784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,2,128,1,float16,float16,0,1.7735786437988281
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,4,128,1,float16,float16,0,1.7548799514770508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,4,128,1,float16,fp8,0,1.4111305872599285
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,8,128,1,float16,float16,0,1.4960959752400715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,8,128,1,float16,fp8,0,1.489301363627116
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,48,128,1,float16,float16,0,0.1430186629295349
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,1,128,1,float16,fp8,0,0.7567946910858154
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,2,128,1,float16,float16,0,0.8834133148193359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,4,128,1,float16,float16,0,0.7610399723052979
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,2,128,1,float16,fp8,0,0.7563253243764242
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,48,128,1,float16,fp8,0,0.7332800229390463
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,4,128,1,float16,fp8,0,0.7569440205891927
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,8,128,1,float16,float16,0,0.7984159787495931
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,8,128,1,float16,fp8,0,0.7175412972768148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,48,128,1,float16,float16,0,1.560922622680664
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,1,128,1,float16,fp8,0,0.3920533259709676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,1,128,1,float16,float16,0,0.4651679992675781
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,48,128,1,float16,float16,0,0.4411413272221883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,2,128,1,float16,fp8,0,0.39163732528686523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,2,128,1,float16,float16,0,0.4503893454869588
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,4,128,1,float16,fp8,0,0.3916906515757243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,8,128,1,float16,float16,0,0.4091626803080241
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,8,128,1,float16,fp8,0,0.39264531930287677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,1,128,1,float16,float16,0,0.22350933154424033
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,1,128,1,float16,fp8,0,0.21002666155497232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,48,128,1,float16,float16,0,0.24265599250793457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,48,128,1,float16,fp8,0,0.20830933252970377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,48,128,1,float16,float16,0,0.7746613025665283
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,2,128,1,float16,float16,0,0.231440007686615
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,2,128,1,float16,fp8,0,0.2109866738319397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,4,128,1,float16,float16,0,0.24002132813135782
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,4,128,1,float16,fp8,0,0.2126506765683492
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,8,128,1,float16,fp8,0,0.19808000326156616
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,48,128,1,float16,fp8,0,0.3930506706237793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,1,128,1,float16,float16,0,0.1269706686337789
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,48,128,1,float16,float16,0,0.13310933113098145
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,4,128,1,float16,float16,0,0.43296531836191815
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,48,128,1,float16,fp8,0,0.11774399876594543
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,1,128,1,float16,fp8,0,0.11915733416875203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,4,128,1,float16,float16,0,0.12685867150624594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,2,128,1,float16,fp8,0,0.1193386713663737
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,4,128,1,float16,fp8,0,0.11063466469446818
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,8,128,1,float16,float16,0,0.11962133646011353
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,1,128,1,float16,float16,0,0.9329439798990885
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,1,128,1,float16,float16,0,0.07110400001207988
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,1,128,1,float16,fp8,0,0.07188266515731812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,2,128,1,float16,float16,0,0.07905599971612294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,2,128,1,float16,fp8,0,0.06628266473611195
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,4,128,1,float16,float16,0,0.07187200089295705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,4,128,1,float16,fp8,0,0.07111466427644093
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,48,128,1,float16,fp8,0,0.0669706662495931
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,48,128,1,float16,float16,0,0.07865066826343536
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,8,128,1,float16,float16,0,0.07242133220036824
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,8,128,1,float16,fp8,0,0.06634133557478587
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,8,128,1,float16,float16,0,0.21214399735132852
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,1,128,1,float16,float16,0,2.098522663116455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,4,128,1,float16,float16,0,1.8647680282592773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,2,128,1,float16,float16,0,0.12999999523162842
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,2,128,1,float16,fp8,0,1.7731733322143555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,2,128,1,float16,float16,0,1.9545547167460124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,4,128,1,float16,fp8,0,1.8641546567281086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,8,128,1,float16,fp8,0,0.11688533425331116
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,1,128,1,float16,float16,0,1.0744907061258953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,8,128,1,float16,float16,0,1.9676693280537922
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,1,128,1,float16,fp8,0,0.9415520032246908
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,2,128,1,float16,float16,0,1.052293300628662
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,48,128,1,float16,float16,0,0.9645439783732096
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,48,128,1,float16,fp8,0,0.9065706729888916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,4,128,1,float16,float16,0,1.0946880181630452
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,2,128,1,float16,fp8,0,0.9396266937255859
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,4,128,1,float16,fp8,0,0.9414026737213135
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,1,128,1,float16,float16,0,0.4819093147913615
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,8,128,1,float16,float16,0,0.9445333480834961
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,8,128,1,float16,fp8,0,0.8969866434733073
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,1,128,1,float16,fp8,0,1.7710827191670735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,48,128,1,float16,fp8,0,0.5127946535746256
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,2,128,1,float16,float16,0,0.4820213317871094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,48,128,1,float16,float16,0,0.529423991839091
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,2,128,1,float16,fp8,0,0.4805599848429362
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,4,128,1,float16,float16,0,0.48257601261138916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,8,128,1,float16,float16,0,0.5128586689631144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,8,128,1,float16,fp8,0,0.4585813283920288
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,4,128,1,float16,fp8,0,0.4814879894256592
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,1,128,1,float16,float16,0,0.2959573268890381
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,1,128,1,float16,fp8,0,0.25438400109608966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,2,128,1,float16,float16,0,0.26819199323654175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,48,128,1,float16,float16,0,0.28778133789698285
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,4,128,1,float16,fp8,0,0.2569813330968221
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,48,128,1,float16,fp8,0,0.2422879934310913
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,4,128,1,float16,float16,0,0.25284266471862793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,8,128,1,float16,float16,0,0.2537066737810771
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,1,128,1,float16,float16,0,0.14865600069363913
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,8,128,1,float16,fp8,0,0.253983994325002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,1,128,1,float16,fp8,0,0.13723733027776083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,2,128,1,float16,float16,0,0.14566933115323386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,4,128,1,float16,float16,0,0.15225600202878317
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,48,128,1,float16,fp8,0,0.13727999726931253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,48,128,1,float16,float16,0,0.156058669090271
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,1,128,1,float16,fp8,0,0.481167991956075
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,8,128,1,float16,float16,0,0.15149866541226706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,8,128,1,float16,fp8,0,0.1393173336982727
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,1,128,1,float16,float16,0,0.08746666709582011
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,1,128,1,float16,fp8,0,0.07844266792138417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,2,128,1,float16,float16,0,0.08653333783149719
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,2,128,1,float16,fp8,0,0.07863466441631317
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,48,128,1,float16,float16,0,0.08964799841245015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,48,128,1,float16,fp8,0,0.0765226682027181
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,4,128,1,float16,fp8,0,0.07833066582679749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,8,128,1,float16,float16,0,0.08946667114893596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,8,128,1,float16,fp8,0,0.0783786674340566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,2,128,1,float16,fp8,0,0.23968533674875894
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,8,128,1,float16,fp8,0,1.8610240618387859
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,1,128,1,float16,float16,0,0.049882665276527405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,2,128,1,float16,float16,0,0.05026666820049286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,48,128,1,float16,fp8,0,0.04711999992529551
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,48,128,1,float16,float16,0,0.05183466772238413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,2,128,1,float16,fp8,0,0.1365066667397817
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,4,128,1,float16,float16,0,0.05353599786758423
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,4,128,1,float16,fp8,0,0.04751466711362203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,4,128,1,float16,fp8,0,0.13661332925160727
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,8,128,1,float16,float16,0,0.05366933345794678
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,8,128,1,float16,fp8,0,0.04965866605440775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,4,128,1,float16,float16,0,0.08676266670227051
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,1,128,1,float16,float16,0,1.9925653139750164
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,1,128,1,float16,fp8,0,1.9143679936726887
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,2,128,1,float16,float16,0,2.05403200785319
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,1,128,1,float16,fp8,0,0.05020266771316528
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,2,128,1,float16,fp8,0,0.04722133278846741
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,4,128,1,float16,float16,0,2.0241173108418784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,4,128,1,float16,fp8,0,1.9147413571675618
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,8,128,1,float16,float16,0,2.004469394683838
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,8,128,1,float16,fp8,0,1.917941411336263
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,1,128,1,float16,float16,0,1.1059892972310383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,48,128,1,float16,float16,0,1.0303200085957844
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,2,128,1,float16,fp8,0,1.0026559829711914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,48,128,1,float16,fp8,0,0.9755626519521078
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,2,128,1,float16,fp8,0,1.990928014119466
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,8,128,1,float16,float16,0,1.011034647623698
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,2,128,1,float16,float16,0,1.005354642868042
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,1,128,1,float16,fp8,0,1.002229372660319
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,4,128,1,float16,fp8,0,1.0032479763031006
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,2,128,1,float16,float16,0,0.5122720003128052
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,1,128,1,float16,fp8,0,0.5102506478627523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,1,128,1,float16,float16,0,0.5733173290888468
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,2,128,1,float16,fp8,0,0.510757327079773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,48,128,1,float16,float16,0,0.5529226859410604
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,4,128,1,float16,float16,0,0.5135680039723715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,48,128,1,float16,fp8,0,0.4959893226623535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,1,128,1,float16,float16,0,0.2897546688715617
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,48,128,1,float16,fp8,0,0.25779199600219727
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,48,128,1,float16,float16,0,0.3083359996477763
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,4,128,1,float16,float16,0,1.1029547055562336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,2,128,1,float16,float16,0,0.2800533374150594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,2,128,1,float16,fp8,0,0.26813334226608276
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,4,128,1,float16,float16,0,0.2864533265431722
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,4,128,1,float16,fp8,0,0.2649066646893819
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,8,128,1,float16,fp8,0,1.003343979517619
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,8,128,1,float16,fp8,0,0.2666880091031392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,8,128,1,float16,float16,0,0.26693334182103473
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,8,128,1,float16,float16,0,0.535045345624288
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,4,128,1,float16,fp8,0,0.5103040138880411
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,1,128,1,float16,float16,0,0.15546133120854697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,1,128,1,float16,fp8,0,0.14425599575042725
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,48,128,1,float16,fp8,0,0.1383626659711202
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,48,128,1,float16,float16,0,0.14644799629847208
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,4,128,1,float16,float16,0,0.15321066975593567
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,4,128,1,float16,fp8,0,0.14206399520238241
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,8,128,1,float16,fp8,0,0.14215999841690063
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,1,128,1,float16,fp8,0,0.2649493416150411
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,1,128,1,float16,fp8,0,0.08055999875068665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,48,128,1,float16,fp8,0,0.07799466451009114
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,2,128,1,float16,float16,0,0.08572266499201457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,4,128,1,float16,float16,0,0.0864586631457011
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,2,128,1,float16,fp8,0,0.07649066547552745
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,4,128,1,float16,fp8,0,0.07860800127188365
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,8,128,1,float16,float16,0,0.08097066481908162
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,8,128,1,float16,fp8,0,0.07982933521270752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,8,128,1,float16,fp8,0,0.4916906754175822
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,2,128,1,float16,float16,0,0.15492799878120422
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,48,128,1,float16,float16,0,0.054511999090512596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,48,128,1,float16,fp8,0,0.04619733492533366
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,2,128,1,float16,fp8,0,0.14283733566602072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,8,128,1,float16,float16,0,0.14355199535687765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,2,128,1,float16,float16,0,0.0524533341328303
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,2,128,1,float16,fp8,0,0.048570667703946434
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,4,128,1,float16,fp8,0,0.044549331068992615
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,8,128,1,float16,float16,0,0.05101333558559418
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,8,128,1,float16,fp8,0,0.048298666874567665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,1,128,1,float16,float16,0,0.08841066559155782
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,1,128,1,float16,float16,0,0.035616000493367515
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,1,128,1,float16,fp8,0,0.033226666351159416
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,2,128,1,float16,float16,0,0.03569599986076355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,2,128,1,float16,fp8,0,0.03330666571855545
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,48,128,1,float16,float16,0,0.08278400202592213
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,48,128,1,float16,fp8,0,0.03332799921433131
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,4,128,1,float16,fp8,0,0.033471999069054924
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,4,128,1,float16,float16,0,0.03570666660865148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,8,128,1,float16,float16,0,0.03346666693687439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,8,128,1,float16,fp8,0,0.03347733368476232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,1,128,1,float16,float16,0,0.052015999952952065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,1,128,1,float16,fp8,0,0.04910933474699656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,1,128,1,float16,float16,0,1.5064105987548828
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,1,128,1,float16,fp8,0,1.5040213267008464
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,4,128,1,float16,float16,0,0.048810665806134544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,2,128,1,float16,float16,0,1.5073760350545247
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,2,128,1,float16,fp8,0,1.4882133801778157
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,48,128,1,float16,float16,0,0.03356799980004629
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,1,128,1,float16,float16,0,0.7606666882832845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,48,128,1,float16,float16,0,0.7769599755605062
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,48,128,1,float16,fp8,0,0.7438027064005533
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,4,128,1,float16,float16,0,1.5098133087158203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,4,128,1,float16,fp8,0,1.4902559916178386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,1,128,1,float16,fp8,0,0.7416000366210938
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,2,128,1,float16,float16,0,0.7639413674672445
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,2,128,1,float16,fp8,0,0.7404800256093343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,8,128,1,float16,float16,0,1.5119199752807617
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,8,128,1,float16,fp8,0,1.4661280314127605
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,4,128,1,float16,float16,0,0.7616426944732666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,4,128,1,float16,fp8,0,0.7396799723307291
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,8,128,1,float16,float16,0,0.76473601659139
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,8,128,1,float16,fp8,0,0.7397387027740479
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,48,128,1,float16,float16,0,0.39614399274190265
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,2,128,1,float16,float16,0,0.38796265920003253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,1,128,1,float16,fp8,0,0.3858400185902913
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,4,128,1,float16,fp8,0,0.38809601465861004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,8,128,1,float16,float16,0,0.3908960024515788
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,4,128,1,float16,float16,0,0.38811198870340985
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,2,128,1,float16,fp8,0,0.3877280155817668
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,8,128,1,float16,fp8,0,0.3858773310979207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,2,128,1,float16,float16,0,0.20243734121322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,1,128,1,float16,float16,0,0.21215999126434326
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,1,128,1,float16,fp8,0,0.20175999402999878
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,48,128,1,float16,float16,0,0.20654400189717612
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,48,128,1,float16,fp8,0,0.19738133748372397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,4,128,1,float16,float16,0,0.20152000586191812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,1,128,1,float16,float16,0,0.4002773364384969
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,4,128,1,float16,fp8,0,0.2006666660308838
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,2,128,1,float16,fp8,0,0.19528534015019736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,48,128,1,float16,fp8,0,0.3784319957097371
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,8,128,1,float16,float16,0,0.2074399987856547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,1,128,1,float16,float16,0,0.1164533297220866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,2,128,1,float16,float16,0,0.11358933647473653
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,2,128,1,float16,fp8,0,0.11001066366831462
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,4,128,1,float16,fp8,0,0.10825066765149434
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,48,128,1,float16,float16,0,0.12418133020401001
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,48,128,1,float16,fp8,0,0.10584533214569092
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,4,128,1,float16,float16,0,0.11578133702278137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,8,128,1,float16,float16,0,0.10951466361681621
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,8,128,1,float16,fp8,0,0.10900266965230306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,1,128,1,float16,float16,0,0.06700799862543742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,4,128,1,float16,float16,0,0.06217599908510844
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,2,128,1,float16,fp8,0,0.05937066674232483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,2,128,1,float16,float16,0,0.06613333523273468
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,48,128,1,float16,float16,0,0.06424533327420552
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,4,128,1,float16,fp8,0,0.058229332168896995
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,8,128,1,float16,fp8,0,0.0603413333495458
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,8,128,1,float16,float16,0,0.06318399806817372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,1,128,1,float16,float16,0,0.039664000272750854
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,1,128,1,float16,fp8,0,0.035274667044480644
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,2,128,1,float16,float16,0,0.03959999978542328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,48,128,1,float16,float16,0,0.03955733279387156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,2,128,1,float16,fp8,0,0.035205334424972534
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,48,128,1,float16,fp8,0,0.03530666728814443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,4,128,1,float16,float16,0,0.0394400010506312
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,8,128,1,float16,fp8,0,0.20070934295654297
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,8,128,1,float16,fp8,0,0.035386666655540466
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,8,128,1,float16,float16,0,0.03737066686153412
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,1,128,1,float16,fp8,0,0.025045332809289295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,1,128,1,float16,float16,0,0.025392000873883564
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,2,128,1,float16,float16,0,0.025466665625572205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,2,128,1,float16,fp8,0,0.025040000677108765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,48,128,1,float16,float16,0,0.025274666647116344
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,48,128,1,float16,fp8,0,0.025429333249727886
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,1,128,1,float16,fp8,0,0.06155199805895487
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,4,128,1,float16,fp8,0,0.025072000920772552
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,8,128,1,float16,fp8,0,0.025231999655564625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,48,128,1,float16,fp8,0,0.06009600063165029
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,48,128,1,float16,float16,0,0.017968000223239262
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,1,128,1,float16,fp8,0,0.017194667210181553
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,2,128,1,float16,float16,0,0.018458666900793713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,2,128,1,float16,fp8,0,0.017157333592573803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,4,128,1,float16,float16,0,0.018458666900793713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,4,128,1,float16,fp8,0,0.017269333203633625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,8,128,1,float16,float16,0,0.01762666677435239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,1,128,1,float16,fp8,0,0.10525332887967427
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,8,128,1,float16,fp8,0,0.017237332959969837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,4,128,1,float16,fp8,0,0.03587199995915095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,4,128,1,float16,float16,0,0.025253333151340485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,1,128,1,float16,float16,0,0.6314506530761719
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,8,128,1,float16,float16,0,0.025274666647116344
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,1,128,1,float16,fp8,0,0.6191413402557373
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,2,128,1,float16,fp8,0,0.6188799937566122
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,1,128,1,float16,float16,0,0.01915733392039935
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,48,128,1,float16,fp8,0,0.016842667013406754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,4,128,1,float16,float16,0,0.6230026483535767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,4,128,1,float16,fp8,0,0.6234773397445679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,1,128,1,float16,float16,0,0.317466676235199
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,8,128,1,float16,float16,0,0.6246879895528158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,8,128,1,float16,fp8,0,0.6218239863713583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,48,128,1,float16,float16,0,0.3227360049883525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,1,128,1,float16,fp8,0,0.31416000922520954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,2,128,1,float16,float16,0,0.3182506759961446
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,2,128,1,float16,fp8,0,0.3141226569811503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,8,128,1,float16,float16,0,0.31782400608062744
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,4,128,1,float16,float16,0,0.32755200068155926
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,8,128,1,float16,fp8,0,0.31412800153096515
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,2,128,1,float16,float16,0,0.6231199900309244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,48,128,1,float16,float16,0,0.1678239901860555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,48,128,1,float16,fp8,0,0.1625226636727651
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,1,128,1,float16,fp8,0,0.16526400049527487
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,2,128,1,float16,float16,0,0.16461333632469177
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,1,128,1,float16,float16,0,0.1707680026690165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,2,128,1,float16,fp8,0,0.16218133767445883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,4,128,1,float16,float16,0,0.16884799798329672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,48,128,1,float16,fp8,0,0.3158400058746338
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,4,128,1,float16,fp8,0,0.16387200355529785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,8,128,1,float16,fp8,0,0.16434666514396667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,8,128,1,float16,float16,0,0.16517333189646402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,1,128,1,float16,float16,0,0.09541866183280945
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,1,128,1,float16,fp8,0,0.08875733613967896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,4,128,1,float16,float16,0,0.09334933757781982
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,48,128,1,float16,float16,0,0.0920799970626831
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,4,128,1,float16,fp8,0,0.08872532844543457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,2,128,1,float16,float16,0,0.09290666381518047
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,48,128,1,float16,fp8,0,0.08876799543698628
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,2,128,1,float16,fp8,0,0.09064533313115437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,4,128,1,float16,fp8,0,0.31413867076237995
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,1,128,1,float16,float16,0,0.0558186670144399
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,8,128,1,float16,float16,0,0.09095999598503113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,8,128,1,float16,fp8,0,0.08871466914812724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,2,128,1,float16,float16,0,0.05492266515890757
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,2,128,1,float16,fp8,0,0.050000001986821495
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,4,128,1,float16,float16,0,0.05211733281612396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,48,128,1,float16,fp8,0,0.051551997661590576
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,8,128,1,float16,float16,0,0.0529120018084844
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,8,128,1,float16,fp8,0,0.05054933329423269
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,1,128,1,float16,float16,0,0.03143466760714849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,1,128,1,float16,fp8,0,0.029050665597120922
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,2,128,1,float16,float16,0,0.03133333226044973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,4,128,1,float16,fp8,0,0.029338667790095013
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,48,128,1,float16,fp8,0,0.029338667790095013
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,2,128,1,float16,fp8,0,0.029509333272775013
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,4,128,1,float16,float16,0,0.029088000456492107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,8,128,1,float16,float16,0,0.031370667119820915
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,8,128,1,float16,fp8,0,0.029152000943819683
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,1,128,1,float16,float16,0,0.02319466571013133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,1,128,1,float16,fp8,0,0.021168000996112823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,2,128,1,float16,float16,0,0.023141334454218548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,48,128,1,float16,fp8,0,0.021168000996112823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,2,128,1,float16,fp8,0,0.02094399929046631
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,48,128,1,float16,float16,0,0.021381333470344543
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,8,128,1,float16,fp8,0,0.021253332495689392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,8,128,1,float16,float16,0,0.02143999934196472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,1,128,1,float16,fp8,0,0.05003199974695841
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,1,128,1,float16,float16,0,0.014666666587193808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,1,128,1,float16,fp8,0,0.01470400020480156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,2,128,1,float16,float16,0,0.014869333555301031
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,48,128,1,float16,float16,0,0.014975999792416891
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,4,128,1,float16,fp8,0,0.05190933247407278
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,2,128,1,float16,fp8,0,0.014826666563749313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,4,128,1,float16,float16,0,0.014730667074521383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,4,128,1,float16,fp8,0,0.01522133375207583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,8,128,1,float16,float16,0,0.01482133318980535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,8,128,1,float16,fp8,0,0.014842666685581207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,48,128,1,float16,float16,0,0.0314026673634847
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,1,128,1,float16,float16,0,0.014778666198253632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,1,128,1,float16,fp8,0,0.01481066644191742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,48,128,1,float16,float16,0,0.015082667271296183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,48,128,1,float16,fp8,0,0.012944000462690989
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,2,128,1,float16,fp8,0,0.014752000570297241
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,4,128,1,float16,float16,0,0.014783999572197596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,4,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,8,128,1,float16,float16,0,0.014933332800865173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,8,128,1,float16,fp8,0,0.014709333578745524
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,1,128,1,float16,float16,0,0.38865065574645996
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,1,128,1,float16,fp8,0,0.38537601629892987
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,2,128,1,float16,float16,0,0.39109333356221515
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,48,128,1,float16,fp8,0,0.014757333944241205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,2,128,1,float16,fp8,0,0.3839679956436157
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,4,128,1,float16,float16,0,0.3892853260040283
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,2,128,1,float16,float16,0,0.014922666052977243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,8,128,1,float16,float16,0,0.3894666830698649
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,4,128,1,float16,float16,0,0.023232000569502514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,8,128,1,float16,fp8,0,0.3853013515472412
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,1,128,1,float16,fp8,0,0.19868266582489014
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,1,128,1,float16,float16,0,0.20411733786265054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,48,128,1,float16,fp8,0,0.19724265734354654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,48,128,1,float16,float16,0,0.2021333376566569
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,2,128,1,float16,float16,0,0.20018666982650757
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,2,128,1,float16,fp8,0,0.1981653372446696
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,4,128,1,float16,float16,0,0.19962666432062784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,4,128,1,float16,fp8,0,0.19739733139673868
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,8,128,1,float16,float16,0,0.20189332962036133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,8,128,1,float16,fp8,0,0.1963040033976237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,1,128,1,float16,float16,0,0.10518399874369304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,4,128,1,float16,fp8,0,0.3855733474095662
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,2,128,1,float16,fp8,0,0.10275200009346008
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,48,128,1,float16,fp8,0,0.10321600238482158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,4,128,1,float16,fp8,0,0.021210665504137676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,1,128,1,float16,fp8,0,0.10430933038393657
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,4,128,1,float16,float16,0,0.10707199573516846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,4,128,1,float16,fp8,0,0.1014400025208791
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,8,128,1,float16,float16,0,0.10492266217867534
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,1,128,1,float16,float16,0,0.0580213318268458
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,2,128,1,float16,float16,0,0.058117335041364036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,48,128,1,float16,fp8,0,0.057775999108950295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,1,128,1,float16,fp8,0,0.05784533421198527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,48,128,1,float16,float16,0,0.061621333161989846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,2,128,1,float16,fp8,0,0.05585066477457682
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,4,128,1,float16,float16,0,0.06015466650327047
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,4,128,1,float16,fp8,0,0.05625066657861074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,8,128,1,float16,float16,0,0.05831466615200043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,8,128,1,float16,fp8,0,0.056133334835370384
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,1,128,1,float16,float16,0,0.03551466763019562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,48,128,1,float16,float16,0,0.03562133262554804
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,1,128,1,float16,fp8,0,0.033471999069054924
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,48,128,1,float16,fp8,0,0.03341866781314214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,48,128,1,float16,float16,0,0.10685867071151733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,2,128,1,float16,float16,0,0.03409066547950109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,2,128,1,float16,fp8,0,0.033376000821590424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,2,128,1,float16,float16,0,0.10523733496665955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,4,128,1,float16,float16,0,0.03331733246644338
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,8,128,1,float16,float16,0,0.03461333364248276
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,4,128,1,float16,fp8,0,0.03369600077470144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,8,128,1,float16,fp8,0,0.03356799980004629
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,8,128,1,float16,fp8,0,0.10247466961542766
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,1,128,1,float16,fp8,0,0.020794666061798733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,1,128,1,float16,float16,0,0.020997333029905956
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,48,128,1,float16,fp8,0,0.020949333906173706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,4,128,1,float16,float16,0,0.021365332106749218
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,2,128,1,float16,fp8,0,0.020949333906173706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,4,128,1,float16,fp8,0,0.019354666272799175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,8,128,1,float16,float16,0,0.021317332983016968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,48,128,1,float16,float16,0,0.054511999090512596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,8,128,1,float16,fp8,0,0.019472000499566395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,1,128,1,float16,float16,0,0.015082667271296183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,2,128,1,float16,fp8,0,0.015109332899252573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,48,128,1,float16,float16,0,0.015002666662136713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,48,128,1,float16,fp8,0,0.015109332899252573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,4,128,1,float16,float16,0,0.01524266724785169
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,4,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,8,128,1,float16,float16,0,0.015125333021084467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,8,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,1,128,1,float16,float16,0,0.010901333143313726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,1,128,1,float16,fp8,0,0.011002667248249054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,2,128,1,float16,float16,0,0.010762666662534079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,2,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,4,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,48,128,1,float16,float16,0,0.010762666662534079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,48,128,1,float16,fp8,0,0.011066666493813196
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,8,128,1,float16,float16,0,0.010826667149861654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,8,128,1,float16,fp8,0,0.010703999549150467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,2,128,1,float16,float16,0,0.021226666867733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,1,128,1,float16,float16,0,0.010687999427318573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,1,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,2,128,1,float16,float16,0,0.010725333044926325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,2,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,48,128,1,float16,float16,0,0.02096533278624217
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,48,128,1,float16,float16,0,0.01101333275437355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,48,128,1,float16,fp8,0,0.0106133334338665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,8,128,1,float16,float16,0,0.01062400018175443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,8,128,1,float16,fp8,0,0.011002667248249054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,2,128,1,float16,float16,0,0.015029333531856537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,4,128,1,float16,float16,0,0.011055999745925268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,1,128,1,float16,fp8,0,0.30193599065144855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,1,128,1,float16,float16,0,0.30799466371536255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,2,128,1,float16,float16,0,0.3069760004679362
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,2,128,1,float16,fp8,0,0.3028586705525716
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,4,128,1,float16,float16,0,0.010842667271693548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,4,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,4,128,1,float16,fp8,0,0.3017173409461975
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,8,128,1,float16,float16,0,0.30754133065541583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,8,128,1,float16,fp8,0,0.3017759919166565
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,1,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,4,128,1,float16,float16,0,0.3071413238843282
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,48,128,1,float16,float16,0,0.160480002562205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,1,128,1,float16,float16,0,0.16150933504104614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,1,128,1,float16,fp8,0,0.15601066748301187
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,4,128,1,float16,float16,0,0.16222400466601053
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,2,128,1,float16,float16,0,0.16015467047691345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,8,128,1,float16,fp8,0,0.15661333004633585
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,2,128,1,float16,fp8,0,0.15676800409952799
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,4,128,1,float16,fp8,0,0.15803200006484985
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,1,128,1,float16,float16,0,0.0848586658636729
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,1,128,1,float16,fp8,0,0.08261333405971527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,2,128,1,float16,float16,0,0.08530132969220479
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,2,128,1,float16,fp8,0,0.08240533371766408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,48,128,1,float16,fp8,0,0.15434666474660239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,4,128,1,float16,float16,0,0.08468266328175862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,4,128,1,float16,fp8,0,0.08265066643555959
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,48,128,1,float16,fp8,0,0.08269333342711131
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,8,128,1,float16,float16,0,0.08458667000134786
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,8,128,1,float16,fp8,0,0.08083733419577281
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,1,128,1,float16,float16,0,0.04743466774622599
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,2,128,1,float16,float16,0,0.04786666731039683
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,4,128,1,float16,float16,0,0.04823466638724009
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,48,128,1,float16,fp8,0,0.04569066564242045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,2,128,1,float16,fp8,0,0.04572799801826477
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,4,128,1,float16,fp8,0,0.045696000258127846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,1,128,1,float16,fp8,0,0.045696000258127846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,8,128,1,float16,float16,0,0.04744000236193339
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,1,128,1,float16,float16,0,0.029167999823888142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,8,128,1,float16,fp8,0,0.04566933214664459
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,48,128,1,float16,float16,0,0.04983466863632202
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,1,128,1,float16,fp8,0,0.02739199995994568
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,2,128,1,float16,float16,0,0.028922667105992634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,48,128,1,float16,fp8,0,0.02731200059254964
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,2,128,1,float16,fp8,0,0.027386667827765148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,4,128,1,float16,fp8,0,0.027317332724730175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,4,128,1,float16,float16,0,0.029120000700155895
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,8,128,1,float16,float16,0,0.02734400083621343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,8,128,1,float16,fp8,0,0.027082666754722595
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,1,128,1,float16,float16,0,0.01732800031701724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,8,128,1,float16,float16,0,0.16498667001724243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,1,128,1,float16,fp8,0,0.016858667135238647
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,2,128,1,float16,float16,0,0.01749333366751671
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,48,128,1,float16,fp8,0,0.016794666647911072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,48,128,1,float16,float16,0,0.08650133013725281
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,4,128,1,float16,float16,0,0.01725333308180173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,4,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,2,128,1,float16,fp8,0,0.017157333592573803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,8,128,1,float16,float16,0,0.017450666675964992
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,1,128,1,float16,float16,0,0.013760000467300415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,1,128,1,float16,fp8,0,0.012725333372751871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,8,128,1,float16,fp8,0,0.016832000265518825
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,2,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,48,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,4,128,1,float16,float16,0,0.013093333691358566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,8,128,1,float16,float16,0,0.012847999731699625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,8,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,48,128,1,float16,float16,0,0.028186666468779247
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,48,128,1,float16,fp8,0,0.010703999549150467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,48,128,1,float16,float16,0,0.01081066702802976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,1,128,1,float16,fp8,0,0.010687999427318573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,1,128,1,float16,float16,0,0.010805333654085795
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,2,128,1,float16,fp8,0,0.010863999525705973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,2,128,1,float16,float16,0,0.010944000134865442
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,48,128,1,float16,float16,0,0.017557332913080852
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,4,128,1,float16,float16,0,0.010922666639089584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,8,128,1,float16,float16,0,0.010618666807810465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,8,128,1,float16,fp8,0,0.010549332946538925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,1,128,1,float16,float16,0,0.009530666594703993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,2,128,1,float16,float16,0,0.013354666531085968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,1,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,2,128,1,float16,float16,0,0.009119999905427298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,2,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,4,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,4,128,1,float16,float16,0,0.009082666908701261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,48,128,1,float16,fp8,0,0.011077333241701126
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,48,128,1,float16,float16,0,0.013007999708255133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,8,128,1,float16,float16,0,0.010389333590865135
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,8,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,1,128,1,float16,float16,0,0.27164800961812335
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,1,128,1,float16,fp8,0,0.26285332441329956
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,2,128,1,float16,float16,0,0.27078932523727417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,4,128,1,float16,fp8,0,0.010794666906197866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,4,128,1,float16,fp8,0,0.26265066862106323
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,8,128,1,float16,fp8,0,0.26261333624521893
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,8,128,1,float16,float16,0,0.26826133330663043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,4,128,1,float16,fp8,0,0.012815999488035837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,48,128,1,float16,float16,0,0.010634666929642359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,48,128,1,float16,float16,0,0.14099733034769693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,1,128,1,float16,float16,0,0.14138666788736978
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,1,128,1,float16,fp8,0,0.13898666699727377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,2,128,1,float16,float16,0,0.14268799622853598
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,48,128,1,float16,fp8,0,0.13597333431243896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,2,128,1,float16,fp8,0,0.13673067092895508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,4,128,1,float16,float16,0,0.14154133200645447
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,4,128,1,float16,fp8,0,0.13598933815956116
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,8,128,1,float16,float16,0,0.1413386662801107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,2,128,1,float16,fp8,0,0.26522666215896606
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,8,128,1,float16,fp8,0,0.13571199774742126
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,1,128,1,float16,fp8,0,0.07260266443093617
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,48,128,1,float16,fp8,0,0.07261866827805837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,4,128,1,float16,fp8,0,0.07051200171311696
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,2,128,1,float16,float16,0,0.0775679995616277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,4,128,1,float16,float16,0,0.07807999849319458
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,2,128,1,float16,fp8,0,0.07331199944019318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,48,128,1,float16,float16,0,0.07678399980068207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,8,128,1,float16,float16,0,0.07504533231258392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,1,128,1,float16,float16,0,0.043712000052134194
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,48,128,1,float16,fp8,0,0.03961066653331121
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,1,128,1,float16,fp8,0,0.040618665516376495
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,2,128,1,float16,fp8,0,0.040405333042144775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,48,128,1,float16,float16,0,0.04387199878692627
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,2,128,1,float16,float16,0,0.04385066529115041
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,4,128,1,float16,float16,0,0.043824002146720886
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,4,128,1,float16,fp8,0,0.039690665900707245
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,8,128,1,float16,float16,0,0.04343999922275543
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,1,128,1,float16,float16,0,0.02550400048494339
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,1,128,1,float16,fp8,0,0.02515733242034912
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,8,128,1,float16,fp8,0,0.03997333347797394
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,48,128,1,float16,fp8,0,0.025055999557177227
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,2,128,1,float16,fp8,0,0.025402667621771496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,4,128,1,float16,float16,0,0.27410133679707843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,4,128,1,float16,float16,0,0.025226667523384094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,4,128,1,float16,fp8,0,0.02489600082238515
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,8,128,1,float16,float16,0,0.02629866699377696
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,8,128,1,float16,fp8,0,0.02517866591612498
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,1,128,1,float16,float16,0,0.01699200024207433
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,1,128,1,float16,fp8,0,0.016751999656359356
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,2,128,1,float16,float16,0,0.017173333714405697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,48,128,1,float16,fp8,0,0.016437333077192307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,2,128,1,float16,fp8,0,0.01682666689157486
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,4,128,1,float16,fp8,0,0.016250666230916977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,8,128,1,float16,float16,0,0.01692266638080279
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,8,128,1,float16,fp8,0,0.016447999825080235
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,8,128,1,float16,fp8,0,0.07107200225194295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,48,128,1,float16,float16,0,0.012991999586423239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,1,128,1,float16,float16,0,0.013130666067202887
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,48,128,1,float16,fp8,0,0.012661332885424295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,1,128,1,float16,fp8,0,0.013248000293970108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,2,128,1,float16,fp8,0,0.012800000607967377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,2,128,1,float16,float16,0,0.012991999586423239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,2,128,1,float16,float16,0,0.025461333493391674
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,4,128,1,float16,fp8,0,0.012624000509579977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,8,128,1,float16,float16,0,0.013023999830087027
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,8,128,1,float16,fp8,0,0.012666666259368261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,1,128,1,float16,float16,0,0.0780320018529892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,4,128,1,float16,float16,0,0.01725333308180173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,48,128,1,float16,fp8,0,0.008725333337982496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,48,128,1,float16,float16,0,0.00933333362142245
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,1,128,1,float16,float16,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,2,128,1,float16,float16,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,2,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,4,128,1,float16,float16,0,0.009088000282645226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,4,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,8,128,1,float16,fp8,0,0.009072000160813332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,1,128,1,float16,float16,0,0.009109333157539368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,1,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,48,128,1,float16,float16,0,0.025455998877684276
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,48,128,1,float16,float16,0,0.008816000074148178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,4,128,1,float16,float16,0,0.009077333534757296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,2,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,4,128,1,float16,fp8,0,0.009077333534757296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,8,128,1,float16,float16,0,0.009530666594703993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,8,128,1,float16,fp8,0,0.008821333448092142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,48,128,1,float16,float16,0,0.01709866647919019
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,1,128,1,float16,fp8,0,0.009088000282645226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,1,128,1,float16,fp8,0,0.24619199832280478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,1,128,1,float16,float16,0,0.26258132855097455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,8,128,1,float16,float16,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,2,128,1,float16,fp8,0,0.24714666604995728
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,4,128,1,float16,float16,0,0.013082666943470636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,48,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,4,128,1,float16,float16,0,0.26280534267425537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,4,128,1,float16,fp8,0,0.24612265825271606
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,8,128,1,float16,float16,0,0.2628320058186849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,8,128,1,float16,fp8,0,0.2472320000330607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,1,128,1,float16,float16,0,0.13928000132242838
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,1,128,1,float16,fp8,0,0.12788266936937967
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,48,128,1,float16,fp8,0,0.1290613313515981
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,4,128,1,float16,float16,0,0.13929067055384317
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,2,128,1,float16,float16,0,0.14150933424631754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,4,128,1,float16,fp8,0,0.12757866581281027
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,8,128,1,float16,fp8,0,0.127402663230896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,8,128,1,float16,float16,0,0.13794133067131042
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,2,128,1,float16,float16,0,0.26102399826049805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,48,128,1,float16,fp8,0,0.06836799780527751
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,48,128,1,float16,float16,0,0.07669866581757863
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,1,128,1,float16,float16,0,0.07650133470694225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,1,128,1,float16,fp8,0,0.06876266499360402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,2,128,1,float16,float16,0,0.07675200204054515
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,2,128,1,float16,float16,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,4,128,1,float16,fp8,0,0.06832000116507213
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,4,128,1,float16,float16,0,0.07672533392906189
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,8,128,1,float16,fp8,0,0.0682826687892278
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,8,128,1,float16,float16,0,0.07266133526961009
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,1,128,1,float16,fp8,0,0.0378506655494372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,2,128,1,float16,float16,0,0.043141335248947144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,1,128,1,float16,float16,0,0.043568000197410583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,48,128,1,float16,fp8,0,0.03919466584920883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,48,128,1,float16,float16,0,0.041834667325019836
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,2,128,1,float16,fp8,0,0.0378560001651446
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,4,128,1,float16,float16,0,0.04144533226887385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,48,128,1,float16,float16,0,0.14468266566594443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,8,128,1,float16,float16,0,0.04242666562398275
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,2,128,1,float16,fp8,0,0.13105066617329916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,8,128,1,float16,fp8,0,0.039477333426475525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,1,128,1,float16,fp8,0,0.023333333432674408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,2,128,1,float16,float16,0,0.025754667818546295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,1,128,1,float16,float16,0,0.02658133457104365
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,48,128,1,float16,fp8,0,0.023311999936898548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,4,128,1,float16,float16,0,0.025621332228183746
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,2,128,1,float16,fp8,0,0.02349333216746648
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,8,128,1,float16,float16,0,0.025008000433444977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,8,128,1,float16,fp8,0,0.023237332701683044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,1,128,1,float16,float16,0,0.01693333312869072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,2,128,1,float16,float16,0,0.016693333784739178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,1,128,1,float16,fp8,0,0.015109332899252573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,2,128,1,float16,fp8,0,0.01516266663869222
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,48,128,1,float16,float16,0,0.017029333859682083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,2,128,1,float16,fp8,0,0.06938666601975758
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,4,128,1,float16,fp8,0,0.014805333067973455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,8,128,1,float16,float16,0,0.016858667135238647
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,8,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,1,128,1,float16,float16,0,0.01310933381319046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,1,128,1,float16,fp8,0,0.012938667088747025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,48,128,1,float16,fp8,0,0.01369599997997284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,48,128,1,float16,float16,0,0.01268799975514412
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,2,128,1,float16,float16,0,0.013034666577974955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,4,128,1,float16,fp8,0,0.03952533255020777
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,2,128,1,float16,fp8,0,0.012618667135636011
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,4,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,8,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,8,128,1,float16,float16,0,0.012949333836634954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,1,128,1,float16,float16,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,4,128,1,float16,fp8,0,0.024549332757790882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,48,128,1,float16,fp8,0,0.00877333308259646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,48,128,1,float16,float16,0,0.009296000003814697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,1,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,2,128,1,float16,float16,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,4,128,1,float16,float16,0,0.009119999905427298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,4,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,2,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,8,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,4,128,1,float16,float16,0,0.016842667013406754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,1,128,1,float16,float16,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,1,128,1,float16,fp8,0,0.008709333216150602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,48,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,48,128,1,float16,float16,0,0.009050666665037474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,2,128,1,float16,fp8,0,0.00879466657837232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,4,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,4,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,8,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,8,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,4,128,1,float16,float16,0,0.013183999806642532
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,48,128,1,float16,float16,0,0.025226667523384094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,1,128,1,float16,fp8,0,0.010698666175206503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,1,128,1,float16,float16,0,0.013088000317414602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,2,128,1,float16,float16,0,0.018906666586796444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,8,128,1,float16,float16,0,0.010144000252087912
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,48,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,4,128,1,float16,float16,0,0.021498667697111767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,0,0.01728533332546552
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,8,128,1,float16,float16,0,0.02737066646416982
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,2,128,1,float16,fp8,0,0.01708799973130226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,2,128,1,float16,float16,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,1,128,1,float16,float16,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,2,128,1,float16,float16,0,0.01301866645614306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,0,0.02316266546646754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,2,128,1,float16,fp8,0,0.011626667032639185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,48,128,1,float16,fp8,0,0.05566399792830149
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,48,128,1,float16,float16,0,0.07487466434637706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,0,0.014938666174809137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,0,0.017173333714405697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,1,128,1,float16,fp8,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,1,128,1,float16,float16,0,0.009098666409651438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,2,128,1,float16,float16,0,0.009258666386206945
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,48,128,1,float16,float16,0,0.03958933303753535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,2,128,1,float16,fp8,0,0.008810666700204214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,0,0.010693332801262537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,0,0.011514666179815928
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,0,0.010816000401973724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,1,128,1,float16,fp8,0,0.008789333204428354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,1,128,1,float16,float16,0,0.008634666601816813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,48,128,1,float16,fp8,0,0.01899733394384384
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,48,128,1,float16,float16,0,0.021920000513394673
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,1,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,0,0.008576000109314919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,0,0.007605333502093951
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,0,0.008832000195980072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,0,0.008693333094318708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,1,128,1,float16,float16,0,0.008672000219424566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,48,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,48,128,1,float16,float16,0,0.015103999525308609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,48,128,1,float16,fp8,0,0.031093334158261616
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,2,128,1,float16,float16,0,0.008693333094318708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,2,128,1,float16,fp8,0,0.008714666590094566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,0,0.007482666522264481
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,0,0.006927999978264173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,1,128,1,float16,float16,0,0.008629333227872849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,1,128,1,float16,fp8,0,0.008037333066264788
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,2,128,1,float16,float16,0,0.008703999842206636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,48,128,1,float16,float16,0,0.01102399950226148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,48,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,2,128,1,float16,fp8,0,0.008570666735370954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,0,0.007178666690985362
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,0,0.00690133310854435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,0,0.006730666384100914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,2,128,1,float16,fp8,0,0.008527999743819237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,0,0.006751999879876773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,1,128,1,float16,float16,0,0.008639999975760778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,1,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,2,128,1,float16,float16,0,0.008687999720374743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,2,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,48,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,0,0.006874666859706243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,0,0.006773333375652631
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,0,0.012346666306257248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,0,0.006837333242098491
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,48,128,1,float16,float16,0,0.008762666955590248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,0,0.006634666894872983
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,1,128,1,float16,float16,0,0.008682666967312494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,2,128,1,float16,float16,0,0.0086666668454806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,1,128,1,float16,fp8,0,0.007381333038210869
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,48,128,1,float16,float16,0,0.007146666447321574
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,0,0.006874666859706243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,48,128,1,float16,fp8,0,0.0068693334857622785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,0,0.006688000013430913
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,48,128,1,float16,fp8,0,0.006666666517655055
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,48,128,1,float16,float16,0,0.00725333330531915
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,1,128,1,float16,float16,0,0.00874133345981439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,2,128,1,float16,float16,0,0.008858666444818178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,1,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,2,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,0,0.007135999699433644
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,0,0.006853333363930385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,0,0.00707733320693175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,2,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,0,0.006917333230376244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,0,0.007018666714429855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,0,0.0069226666043202085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,0,0.006826666494210561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,2,128,1,float16,float16,0,0.0086666668454806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,1,128,1,float16,fp8,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,1,128,1,float16,fp8,0,12.677050272623697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,2,128,1,float16,fp8,0,12.665354410807291
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,4,128,1,float16,float16,0,13.711493174235025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,2,128,1,float16,float16,0,15.322362263997396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,1,128,1,float16,float16,0,17.102741241455078
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,4,128,1,float16,fp8,0,12.665888468424479
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,8,128,1,float16,float16,0,13.712511698404947
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,8,128,1,float16,fp8,0,13.623029073079428
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,40,128,1,float16,float16,0,6.940250396728516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,40,128,1,float16,fp8,0,6.401573181152344
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,1,128,1,float16,fp8,0,6.887162526448567
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,2,128,1,float16,fp8,0,6.824848175048828
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,1,128,1,float16,float16,0,9.767562866210938
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,2,128,1,float16,float16,0,9.757551829020182
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,4,128,1,float16,float16,0,9.752858479817709
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,4,128,1,float16,fp8,0,6.796831766764323
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,40,128,1,float16,fp8,0,3.2783145904541016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,40,128,1,float16,float16,0,3.5348374048868814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,8,128,1,float16,fp8,0,6.393168131510417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,1,128,1,float16,fp8,0,3.9378185272216797
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,1,128,1,float16,float16,0,4.494026819864909
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,8,128,1,float16,float16,0,9.600101470947266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,2,128,1,float16,fp8,0,3.4629173278808594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,4,128,1,float16,float16,0,3.539306640625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,2,128,1,float16,float16,0,4.653370539347331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,4,128,1,float16,fp8,0,3.4590988159179688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,8,128,1,float16,fp8,0,3.4913225173950195
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,40,128,1,float16,float16,0,2.086005369822184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,1,128,1,float16,float16,0,1.8263519605000813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,40,128,1,float16,fp8,0,2.265077273050944
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,1,128,1,float16,fp8,0,1.7939947446187336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,2,128,1,float16,fp8,0,1.7888587315877278
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,2,128,1,float16,float16,0,2.6024853388468423
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,4,128,1,float16,float16,0,2.2270453770955405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,4,128,1,float16,fp8,0,1.8031946818033855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,8,128,1,float16,float16,0,1.8498560587565105
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,8,128,1,float16,fp8,0,1.8389280637105305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,8,128,1,float16,float16,0,3.5277280807495117
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,1,128,1,float16,fp8,0,7.5294984181722
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,2,128,1,float16,fp8,0,7.770693461100261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,1,128,1,float16,float16,0,10.116533279418945
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,4,128,1,float16,fp8,0,7.7925599416097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,4,128,1,float16,float16,0,9.594741185506185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,8,128,1,float16,float16,0,7.920031865437825
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,2,128,1,float16,float16,0,11.15066146850586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,8,128,1,float16,fp8,0,7.322383880615234
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,40,128,1,float16,float16,0,4.018170674641927
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,1,128,1,float16,fp8,0,3.956234614054362
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,40,128,1,float16,fp8,0,3.9467360178629556
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,1,128,1,float16,float16,0,5.350138982137044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,2,128,1,float16,fp8,0,3.958767890930176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,2,128,1,float16,float16,0,5.642533620198567
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,4,128,1,float16,float16,0,5.31876277923584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,4,128,1,float16,fp8,0,3.952890714009603
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,1,128,1,float16,float16,0,2.0570079485575357
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,8,128,1,float16,fp8,0,3.7174720764160156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,8,128,1,float16,float16,0,4.008495966593425
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,40,128,1,float16,fp8,0,2.149610678354899
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,40,128,1,float16,float16,0,2.400378704071045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,1,128,1,float16,fp8,0,2.7364587783813477
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,2,128,1,float16,float16,0,2.872997283935547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,2,128,1,float16,fp8,0,2.0236639976501465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,4,128,1,float16,fp8,0,2.0248586336771646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,4,128,1,float16,float16,0,2.5004639625549316
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,8,128,1,float16,float16,0,2.056111971537272
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,8,128,1,float16,fp8,0,2.082581361134847
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,40,128,1,float16,float16,0,1.2573706309000652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,2,128,1,float16,float16,0,1.084549347559611
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,1,128,1,float16,float16,0,1.4746346473693848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,2,128,1,float16,fp8,0,1.1032640139261882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,4,128,1,float16,float16,0,1.286405324935913
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,4,128,1,float16,fp8,0,1.000421365102132
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,8,128,1,float16,fp8,0,0.9966613451639811
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,8,128,1,float16,float16,0,1.2949439684549968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,1,128,1,float16,fp8,0,5.599280039469401
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,2,128,1,float16,fp8,0,5.539632161458333
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,40,128,1,float16,fp8,0,1.1534773508707683
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,1,128,1,float16,float16,0,7.998757044474284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,4,128,1,float16,fp8,0,5.43008550008138
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,4,128,1,float16,float16,0,6.042144139607747
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,2,128,1,float16,float16,0,7.873242696126302
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,1,128,1,float16,fp8,0,2.9565601348876953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,1,128,1,float16,float16,0,3.3869654337565103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,40,128,1,float16,float16,0,3.060271898905436
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,8,128,1,float16,float16,0,5.611536026000977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,2,128,1,float16,float16,0,3.729386647542318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,8,128,1,float16,fp8,0,5.194069226582845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,40,128,1,float16,fp8,0,3.7849814097086587
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,2,128,1,float16,fp8,0,2.82588259379069
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,1,128,1,float16,fp8,0,0.9937706788380941
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,4,128,1,float16,float16,0,3.223173459370931
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,4,128,1,float16,fp8,0,2.8174721399943032
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,8,128,1,float16,float16,0,2.849151929219564
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,40,128,1,float16,float16,0,1.4707199732462566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,8,128,1,float16,fp8,0,2.941690762837728
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,1,128,1,float16,float16,0,1.5631306966145833
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,1,128,1,float16,fp8,0,1.557967980702718
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,40,128,1,float16,fp8,0,1.4514719645182292
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,2,128,1,float16,fp8,0,1.4513492584228516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,4,128,1,float16,fp8,0,1.3939092953999836
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,4,128,1,float16,float16,0,1.5601812998453777
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,8,128,1,float16,float16,0,1.4705813725789387
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,1,128,1,float16,float16,0,0.8484106858571371
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,1,128,1,float16,fp8,0,0.7664639949798584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,40,128,1,float16,fp8,0,0.7204693158467611
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,2,128,1,float16,float16,0,0.7797973155975342
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,2,128,1,float16,fp8,0,0.7199680010477701
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,4,128,1,float16,float16,0,0.8243146737416586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,4,128,1,float16,fp8,0,0.7201759815216064
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,8,128,1,float16,float16,0,0.7769813537597656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,8,128,1,float16,fp8,0,0.7176480293273926
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,2,128,1,float16,float16,0,1.5759040514628093
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,8,128,1,float16,fp8,0,1.4590080579121907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,40,128,1,float16,float16,0,0.7800373236338297
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,1,128,1,float16,float16,0,7.526880264282227
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,1,128,1,float16,fp8,0,7.2729441324869795
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,2,128,1,float16,float16,0,7.315034866333008
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,2,128,1,float16,fp8,0,7.427226384480794
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,4,128,1,float16,float16,0,7.315189361572266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,4,128,1,float16,fp8,0,6.804634730021159
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,40,128,1,float16,float16,0,3.7119468053181968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,1,128,1,float16,float16,0,4.561205228169759
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,8,128,1,float16,float16,0,7.323109308878581
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,40,128,1,float16,fp8,0,3.44322141011556
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,8,128,1,float16,fp8,0,7.098447799682617
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,2,128,1,float16,float16,0,3.6919946670532227
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,2,128,1,float16,fp8,0,3.6687841415405273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,4,128,1,float16,float16,0,3.6939465204874673
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,8,128,1,float16,fp8,0,3.6735785802205405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,4,128,1,float16,fp8,0,3.9175148010253906
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,40,128,1,float16,fp8,0,1.7517226537068684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,40,128,1,float16,float16,0,1.8925600051879883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,8,128,1,float16,float16,0,4.804613431294759
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,1,128,1,float16,fp8,0,1.8666027386983235
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,2,128,1,float16,float16,0,1.8840640385945637
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,2,128,1,float16,fp8,0,1.8968106905619304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,4,128,1,float16,float16,0,2.0124212900797525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,1,128,1,float16,fp8,0,3.7341651916503906
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,4,128,1,float16,fp8,0,1.7472693125406902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,8,128,1,float16,float16,0,2.4344746271769204
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,1,128,1,float16,fp8,0,0.9676853020985922
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,1,128,1,float16,float16,0,1.1903306643168132
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,8,128,1,float16,fp8,0,1.866938591003418
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,40,128,1,float16,fp8,0,0.9148533344268799
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,40,128,1,float16,float16,0,0.9824000199635824
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,2,128,1,float16,fp8,0,1.1670453548431396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,4,128,1,float16,fp8,0,0.9677440325419108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,4,128,1,float16,float16,0,1.1659253438313801
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,8,128,1,float16,fp8,0,0.9077866872151693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,8,128,1,float16,float16,0,0.9770613511403402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,1,128,1,float16,float16,0,0.6123573382695516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,1,128,1,float16,fp8,0,0.5183573166529337
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,1,128,1,float16,float16,0,2.0276853243509927
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,2,128,1,float16,float16,0,0.5235946575800577
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,40,128,1,float16,fp8,0,0.516490658124288
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,40,128,1,float16,float16,0,0.5283626715342203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,4,128,1,float16,float16,0,0.5377440055211385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,8,128,1,float16,float16,0,0.5603573322296143
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,8,128,1,float16,fp8,0,0.4854773283004761
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,2,128,1,float16,float16,0,1.0445760091145833
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,1,128,1,float16,float16,0,4.359295845031738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,1,128,1,float16,fp8,0,4.294346809387207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,2,128,1,float16,fp8,0,4.012746810913086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,4,128,1,float16,float16,0,4.303407986958821
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,2,128,1,float16,fp8,0,0.5184373458226522
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,4,128,1,float16,fp8,0,0.5194933414459229
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,4,128,1,float16,fp8,0,4.011066754659017
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,40,128,1,float16,float16,0,2.195141315460205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,40,128,1,float16,fp8,0,2.037333329518636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,8,128,1,float16,fp8,0,4.317525227864583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,1,128,1,float16,float16,0,2.6578027407328286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,1,128,1,float16,fp8,0,2.173130671183268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,2,128,1,float16,float16,0,2.333786646525065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,2,128,1,float16,fp8,0,2.170896053314209
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,4,128,1,float16,float16,0,2.1854079564412436
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,4,128,1,float16,fp8,0,2.18011204401652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,8,128,1,float16,fp8,0,2.0306347211201987
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,40,128,1,float16,float16,0,1.146789312362671
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,1,128,1,float16,fp8,0,1.1118240356445312
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,2,128,1,float16,float16,0,1.117695967356364
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,40,128,1,float16,fp8,0,1.0442773501078289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,2,128,1,float16,fp8,0,1.0471839904785156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,4,128,1,float16,float16,0,1.3441227277119954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,4,128,1,float16,fp8,0,1.0398506323496501
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,8,128,1,float16,float16,0,4.307482719421387
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,8,128,1,float16,fp8,0,1.040992021560669
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,1,128,1,float16,float16,0,0.6911199887593588
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,40,128,1,float16,fp8,0,0.5477759838104248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,40,128,1,float16,float16,0,0.6183626651763916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,2,128,1,float16,float16,0,0.6731680234273275
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,2,128,1,float16,float16,0,4.310463905334473
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,2,128,1,float16,fp8,0,0.5989973147710165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,4,128,1,float16,float16,0,0.6703306833902994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,8,128,1,float16,float16,0,2.3926560084025064
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,4,128,1,float16,fp8,0,0.5453759829203287
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,1,128,1,float16,float16,0,1.191226641337077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,8,128,1,float16,float16,0,0.5924586852391561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,8,128,1,float16,fp8,0,0.5846240123112997
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,1,128,1,float16,float16,0,0.3696586688359578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,1,128,1,float16,fp8,0,0.32310932874679565
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,40,128,1,float16,float16,0,0.3416586716969808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,40,128,1,float16,fp8,0,0.3020266691843669
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,2,128,1,float16,fp8,0,0.3229706684748332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,4,128,1,float16,float16,0,0.3497600158055623
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,4,128,1,float16,fp8,0,0.3218453327814738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,8,128,1,float16,float16,0,1.1193973223368328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,8,128,1,float16,fp8,0,0.29873067140579224
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,1,128,1,float16,fp8,0,0.5832053422927856
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,1,128,1,float16,fp8,0,3.8654025395711265
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,2,128,1,float16,float16,0,4.116213480631511
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,2,128,1,float16,fp8,0,3.858000119527181
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,1,128,1,float16,float16,0,5.311525344848633
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,2,128,1,float16,float16,0,0.3267199993133545
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,4,128,1,float16,float16,0,4.121232032775879
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,8,128,1,float16,float16,0,0.32290132840474445
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,40,128,1,float16,fp8,0,1.9585973421732585
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,8,128,1,float16,float16,0,4.125631968180339
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,4,128,1,float16,fp8,0,3.8625866572062173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,2,128,1,float16,float16,0,2.078144073486328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,8,128,1,float16,fp8,0,3.861194610595703
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,40,128,1,float16,float16,0,2.3449652989705405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,1,128,1,float16,fp8,0,2.079184055328369
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,4,128,1,float16,fp8,0,2.0775839487711587
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,2,128,1,float16,fp8,0,2.0758986473083496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,8,128,1,float16,float16,0,2.082709312438965
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,4,128,1,float16,float16,0,2.5458985964457193
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,8,128,1,float16,fp8,0,2.079978624979655
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,40,128,1,float16,float16,0,1.0811413129170735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,1,128,1,float16,float16,0,1.0596373081207275
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,1,128,1,float16,fp8,0,0.9916213353474935
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,2,128,1,float16,float16,0,1.059706687927246
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,2,128,1,float16,fp8,0,1.150426705678304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,4,128,1,float16,fp8,0,1.0590559641520183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,8,128,1,float16,fp8,0,1.057802677154541
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,40,128,1,float16,float16,0,0.5723253488540649
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,40,128,1,float16,fp8,0,0.5343466599782308
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,1,128,1,float16,fp8,0,0.549397349357605
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,2,128,1,float16,float16,0,0.6659466822942098
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,2,128,1,float16,fp8,0,0.5565013488133749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,4,128,1,float16,float16,0,0.551637331644694
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,4,128,1,float16,fp8,0,0.5498133500417074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,8,128,1,float16,float16,0,0.5586026509602865
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,8,128,1,float16,fp8,0,0.5156960090001425
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,40,128,1,float16,fp8,0,0.9980959892272949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,1,128,1,float16,float16,0,0.3178666631380717
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,40,128,1,float16,float16,0,0.32529600461324054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,40,128,1,float16,fp8,0,0.27852267026901245
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,4,128,1,float16,float16,0,1.1293706893920898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,2,128,1,float16,float16,0,0.3001013398170471
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,1,128,1,float16,fp8,0,0.29690666993459064
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,2,128,1,float16,fp8,0,0.2983893354733785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,8,128,1,float16,float16,0,0.29863999287287396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,8,128,1,float16,fp8,0,0.2969599962234497
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,8,128,1,float16,float16,0,1.062127987543742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,1,128,1,float16,float16,0,0.18652800718943277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,1,128,1,float16,float16,0,2.561610698699951
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,1,128,1,float16,fp8,0,0.17042134205500284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,2,128,1,float16,float16,0,0.18716800212860107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,40,128,1,float16,float16,0,0.18397865692774454
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,40,128,1,float16,fp8,0,0.15954132874806723
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,4,128,1,float16,float16,0,0.17081065972646078
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,4,128,1,float16,fp8,0,0.1698933243751526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,8,128,1,float16,float16,0,0.16887466112772623
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,8,128,1,float16,fp8,0,0.15659733613332114
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,1,128,1,float16,float16,0,2.4935733477274575
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,2,128,1,float16,float16,0,2.4943040211995444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,1,128,1,float16,fp8,0,2.4984052975972495
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,2,128,1,float16,fp8,0,2.34878937403361
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,4,128,1,float16,fp8,0,0.29799999793370563
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,1,128,1,float16,float16,0,0.6420480012893677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,4,128,1,float16,fp8,0,2.349562644958496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,4,128,1,float16,float16,0,2.9826294581095376
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,2,128,1,float16,fp8,0,0.1713013251622518
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,40,128,1,float16,float16,0,1.2871146996815999
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,1,128,1,float16,float16,0,1.3501067161560059
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,40,128,1,float16,fp8,0,1.2000906467437744
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,1,128,1,float16,fp8,0,1.5603200594584148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,2,128,1,float16,fp8,0,1.2629173596700032
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,4,128,1,float16,float16,0,1.27347199122111
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,4,128,1,float16,fp8,0,1.190112034479777
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,8,128,1,float16,float16,0,1.314069350560506
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,4,128,1,float16,float16,0,0.3168639938036601
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,8,128,1,float16,fp8,0,1.1905919710795085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,40,128,1,float16,float16,0,0.6674559911092123
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,1,128,1,float16,float16,0,0.7595626513163248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,1,128,1,float16,fp8,0,0.6090613206227621
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,40,128,1,float16,fp8,0,0.615610678990682
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,2,128,1,float16,float16,0,0.7599413394927979
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,8,128,1,float16,fp8,0,2.351642608642578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,8,128,1,float16,float16,0,2.501530647277832
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,2,128,1,float16,fp8,0,0.6105013291041056
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,4,128,1,float16,float16,0,0.7566933631896973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,4,128,1,float16,fp8,0,0.648031989733378
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,8,128,1,float16,float16,0,0.65120001633962
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,1,128,1,float16,float16,0,0.38789868354797363
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,8,128,1,float16,fp8,0,0.6489493449529012
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,2,128,1,float16,float16,0,0.3654719988505046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,2,128,1,float16,float16,0,1.5289546648661296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,40,128,1,float16,fp8,0,0.3227039972941081
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,4,128,1,float16,float16,0,0.38839467366536456
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,40,128,1,float16,float16,0,0.37089065710703534
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,8,128,1,float16,float16,0,0.397546648979187
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,8,128,1,float16,fp8,0,0.32400532563527423
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,1,128,1,float16,float16,0,0.21061867475509644
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,40,128,1,float16,fp8,0,0.1768853267033895
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,1,128,1,float16,fp8,0,0.18646933635075888
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,2,128,1,float16,float16,0,0.2070293426513672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,40,128,1,float16,float16,0,0.20795732736587524
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,2,128,1,float16,fp8,0,0.18946667512257895
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,4,128,1,float16,fp8,0,0.1895093321800232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,8,128,1,float16,float16,0,0.18876266479492188
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,8,128,1,float16,fp8,0,0.18782933553059897
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,1,128,1,float16,float16,0,0.11958400408426921
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,40,128,1,float16,fp8,0,0.10311466455459595
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,40,128,1,float16,float16,0,0.12190399567286174
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,1,128,1,float16,fp8,0,0.10326932867368062
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,2,128,1,float16,float16,0,0.11321600278218587
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,4,128,1,float16,float16,0,0.12267733613650005
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,4,128,1,float16,fp8,0,0.10928533474604289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,8,128,1,float16,float16,0,0.11063466469446818
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,2,128,1,float16,fp8,0,0.3415679931640625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,8,128,1,float16,fp8,0,0.10905599594116211
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,4,128,1,float16,fp8,0,0.34098132451375324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,4,128,1,float16,float16,0,0.199237326780955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,1,128,1,float16,float16,0,2.904927889506022
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,1,128,1,float16,fp8,0,2.5179893175760903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,2,128,1,float16,float16,0,2.5141493479410806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,2,128,1,float16,fp8,0,2.438543955485026
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,4,128,1,float16,float16,0,2.516320069630941
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,1,128,1,float16,fp8,0,0.3407040039698283
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,4,128,1,float16,fp8,0,2.3878560066223145
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,1,128,1,float16,float16,0,1.2683253288269043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,8,128,1,float16,float16,0,2.7287254333496094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,8,128,1,float16,fp8,0,2.3889387448628745
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,1,128,1,float16,fp8,0,1.251744031906128
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,40,128,1,float16,fp8,0,1.2186240355173747
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,40,128,1,float16,float16,0,1.296730677286784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,2,128,1,float16,float16,0,1.4916213353474934
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,2,128,1,float16,fp8,0,1.2044373353322346
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,4,128,1,float16,float16,0,1.4604585965474446
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,4,128,1,float16,fp8,0,1.2712213198343914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,8,128,1,float16,float16,0,1.2740426858266194
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,1,128,1,float16,float16,0,0.745317300160726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,8,128,1,float16,fp8,0,1.2725173632303874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,40,128,1,float16,float16,0,0.675706704457601
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,1,128,1,float16,fp8,0,0.6536266803741455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,2,128,1,float16,float16,0,0.6476960182189941
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,40,128,1,float16,fp8,0,0.6200053294499716
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,4,128,1,float16,fp8,0,0.6153759956359863
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,4,128,1,float16,float16,0,0.744650681813558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,8,128,1,float16,float16,0,0.6552000045776367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,2,128,1,float16,fp8,0,0.11096533139546712
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,1,128,1,float16,float16,0,0.3373279968897502
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,1,128,1,float16,fp8,0,0.33907731374104816
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,40,128,1,float16,fp8,0,0.33499733606974286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,40,128,1,float16,float16,0,0.3450346787770589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,2,128,1,float16,fp8,0,0.3362933397293091
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,4,128,1,float16,float16,0,0.3687146504720052
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,4,128,1,float16,fp8,0,0.3199733297030131
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,8,128,1,float16,float16,0,0.364901343981425
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,1,128,1,float16,float16,0,0.18307733535766602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,8,128,1,float16,fp8,0,0.3216320077578227
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,1,128,1,float16,fp8,0,0.184063990910848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,2,128,1,float16,float16,0,0.18331199884414673
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,40,128,1,float16,fp8,0,0.17889066537221274
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,2,128,1,float16,fp8,0,0.1830293337504069
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,40,128,1,float16,float16,0,0.18690133094787598
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,4,128,1,float16,fp8,0,0.17352533340454102
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,8,128,1,float16,float16,0,0.18330132961273193
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,8,128,1,float16,fp8,0,0.17246933778127035
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,1,128,1,float16,float16,0,0.1072746713956197
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,1,128,1,float16,fp8,0,0.09707732995351155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,40,128,1,float16,float16,0,0.11619733770688374
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,2,128,1,float16,float16,0,0.10447999835014343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,40,128,1,float16,fp8,0,0.10337600111961365
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,2,128,1,float16,fp8,0,0.1030560036500295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,2,128,1,float16,fp8,0,0.6480480035146078
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,4,128,1,float16,fp8,0,0.103685329357783
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,8,128,1,float16,float16,0,0.10479999581972758
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,8,128,1,float16,fp8,0,0.10173867146174113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,8,128,1,float16,fp8,0,0.6485066811243693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,1,128,1,float16,float16,0,0.07144533097743988
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,1,128,1,float16,fp8,0,0.06425599753856659
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,2,128,1,float16,float16,0,0.07043200234572093
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,2,128,1,float16,fp8,0,0.06400533517201741
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,4,128,1,float16,float16,0,0.06855999926726024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,4,128,1,float16,fp8,0,0.06433600187301636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,2,128,1,float16,float16,0,0.37537066141764325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,40,128,1,float16,fp8,0,0.06019733349482218
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,40,128,1,float16,float16,0,0.06629333396752675
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,8,128,1,float16,float16,0,0.0643093337615331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,8,128,1,float16,fp8,0,0.06362133224805196
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,1,128,1,float16,fp8,0,1.5879680315653484
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,4,128,1,float16,float16,0,0.1828213334083557
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,4,128,1,float16,float16,0,1.5917493502298992
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,2,128,1,float16,float16,0,1.589626630147298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,2,128,1,float16,fp8,0,1.5186187426249187
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,4,128,1,float16,float16,0,0.1048959990342458
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,4,128,1,float16,fp8,0,1.594000021616618
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,8,128,1,float16,float16,0,1.5955467224121094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,8,128,1,float16,fp8,0,1.593839963277181
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,1,128,1,float16,float16,0,0.8056480089823405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,1,128,1,float16,fp8,0,0.8068426450093588
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,40,128,1,float16,fp8,0,0.7775200208028158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,40,128,1,float16,float16,0,0.8240160147349039
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,2,128,1,float16,fp8,0,0.8052053451538086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,2,128,1,float16,float16,0,0.9222880204518636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,4,128,1,float16,float16,0,0.8077653249104818
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,4,128,1,float16,fp8,0,0.8071573575337728
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,8,128,1,float16,float16,0,0.8094240029652914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,1,128,1,float16,float16,0,1.788885275522868
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,8,128,1,float16,fp8,0,0.7730560302734375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,1,128,1,float16,float16,0,0.46889599164326984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,1,128,1,float16,fp8,0,0.4156159957249959
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,4,128,1,float16,float16,0,0.43397335211435956
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,2,128,1,float16,fp8,0,0.41444798310597736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,2,128,1,float16,float16,0,0.460373322168986
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,40,128,1,float16,float16,0,0.42331735293070477
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,8,128,1,float16,float16,0,0.4168479839960734
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,8,128,1,float16,fp8,0,0.39700265725453693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,1,128,1,float16,float16,0,0.24701867500940958
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,1,128,1,float16,fp8,0,0.21866132815678915
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,2,128,1,float16,fp8,0,0.22101332743962607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,4,128,1,float16,fp8,0,0.22009066740671793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,4,128,1,float16,float16,0,0.23917333285013834
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,40,128,1,float16,float16,0,0.22428266207377115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,8,128,1,float16,float16,0,0.22085332870483398
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,8,128,1,float16,fp8,0,0.21917865673700967
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,1,128,1,float16,float16,0,0.13320533434549967
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,1,128,1,float16,fp8,0,0.12074666221936543
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,40,128,1,float16,fp8,0,0.11534933249155681
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,2,128,1,float16,float16,0,0.1321333348751068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,2,128,1,float16,fp8,0,0.11986133456230164
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,4,128,1,float16,float16,0,0.1302079955736796
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,4,128,1,float16,fp8,0,0.12082133690516154
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,8,128,1,float16,fp8,0,0.12090133627255757
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,8,128,1,float16,float16,0,0.12286399801572163
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,1,128,1,float16,float16,0,0.07239999870459239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,1,128,1,float16,fp8,0,0.06795733173688252
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,40,128,1,float16,fp8,0,0.3990186850229899
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,40,128,1,float16,fp8,0,0.07225599884986877
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,40,128,1,float16,float16,0,0.08083733419577281
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,2,128,1,float16,fp8,0,0.07150400181611379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,4,128,1,float16,float16,0,0.07772266864776611
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,4,128,1,float16,fp8,0,0.07147733370463054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,8,128,1,float16,fp8,0,0.07126933336257935
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,8,128,1,float16,float16,0,0.07133333384990692
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,2,128,1,float16,float16,0,0.2196213404337565
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,1,128,1,float16,float16,0,0.04585599899291992
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,1,128,1,float16,fp8,0,0.04347200194994608
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,2,128,1,float16,float16,0,0.046256000796953835
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,40,128,1,float16,fp8,0,0.0461706668138504
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,40,128,1,float16,float16,0,0.04677866895993551
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,4,128,1,float16,float16,0,0.046256000796953835
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,4,128,1,float16,fp8,0,0.046495998899141945
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,8,128,1,float16,fp8,0,0.04346133271853129
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,8,128,1,float16,float16,0,0.046053335070610046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,40,128,1,float16,float16,0,0.13648533821105957
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,1,128,1,float16,float16,0,1.8496267000834148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,4,128,1,float16,fp8,0,0.41475733121236164
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,1,128,1,float16,fp8,0,1.6490186055501301
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,2,128,1,float16,float16,0,0.07540266712506612
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,2,128,1,float16,float16,0,1.751296043395996
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,2,128,1,float16,fp8,0,1.6495572725931804
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,40,128,1,float16,fp8,0,0.21201600631078085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,2,128,1,float16,fp8,0,0.043663998444875084
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,8,128,1,float16,float16,0,1.7169547080993652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,8,128,1,float16,fp8,0,1.6535785992940266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,4,128,1,float16,float16,0,1.8967199325561523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,4,128,1,float16,fp8,0,1.651477336883545
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,1,128,1,float16,float16,0,0.8958186308542887
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,40,128,1,float16,fp8,0,0.8408586978912354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,40,128,1,float16,float16,0,0.8834239641825358
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,1,128,1,float16,fp8,0,0.8337600231170654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,2,128,1,float16,fp8,0,0.8335359891255697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,2,128,1,float16,float16,0,0.9549492994944254
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,4,128,1,float16,float16,0,0.9619733492533366
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,4,128,1,float16,fp8,0,0.8338399728139242
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,8,128,1,float16,float16,0,0.9002986749013265
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,8,128,1,float16,fp8,0,0.8352693716684977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,40,128,1,float16,float16,0,0.4517013231913249
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,1,128,1,float16,float16,0,0.4708746671676636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,40,128,1,float16,fp8,0,0.4296533266703288
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,8,128,1,float16,float16,0,0.4530080159505208
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,4,128,1,float16,float16,0,0.4769066572189331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,2,128,1,float16,fp8,0,0.42557867368062335
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,2,128,1,float16,float16,0,0.4764853318532308
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,4,128,1,float16,fp8,0,0.42471468448638916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,8,128,1,float16,fp8,0,0.442906657854716
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,1,128,1,float16,fp8,0,0.22187199195226034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,2,128,1,float16,float16,0,0.25098133087158203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,2,128,1,float16,fp8,0,0.2209440072377523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,1,128,1,float16,float16,0,0.2495573361714681
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,40,128,1,float16,fp8,0,0.22331732511520386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,4,128,1,float16,fp8,0,0.23053866624832153
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,40,128,1,float16,float16,0,0.26494399706522626
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,8,128,1,float16,fp8,0,0.22155733903249106
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,8,128,1,float16,float16,0,0.23242133855819702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,1,128,1,float16,float16,0,0.13732799887657166
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,1,128,1,float16,fp8,0,0.1253653367360433
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,2,128,1,float16,float16,0,0.12485866745313008
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,2,128,1,float16,fp8,0,0.12559466560681662
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,40,128,1,float16,fp8,0,0.12127466996510823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,4,128,1,float16,float16,0,0.12533332904179892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,8,128,1,float16,fp8,0,0.12660800417264303
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,1,128,1,float16,fp8,0,0.44179201126098633
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,40,128,1,float16,fp8,0,0.0713973343372345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,1,128,1,float16,float16,0,0.07724800209204356
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,1,128,1,float16,fp8,0,0.07128533224264781
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,2,128,1,float16,fp8,0,0.07041066884994507
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,4,128,1,float16,float16,0,0.07509866853555043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,4,128,1,float16,float16,0,0.24125333627065024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,4,128,1,float16,fp8,0,0.07036266724268596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,8,128,1,float16,float16,0,0.07654400169849396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,8,128,1,float16,fp8,0,0.07045866549015045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,40,128,1,float16,float16,0,0.14198933045069376
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,1,128,1,float16,float16,0,0.04710933566093445
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,4,128,1,float16,fp8,0,0.11958400408426921
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,40,128,1,float16,float16,0,0.0450186679760615
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,40,128,1,float16,fp8,0,0.04062933226426443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,2,128,1,float16,float16,0,0.045791998505592346
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,8,128,1,float16,float16,0,0.12662933270136514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,2,128,1,float16,fp8,0,0.04327466587225596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,4,128,1,float16,float16,0,0.0458186666170756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,8,128,1,float16,float16,0,0.043151999513308205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,8,128,1,float16,fp8,0,0.043568000197410583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,1,128,1,float16,float16,0,0.033344000577926636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,40,128,1,float16,float16,0,0.07377600173155467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,1,128,1,float16,fp8,0,0.031514666974544525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,2,128,1,float16,float16,0,0.03339733431736628
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,2,128,1,float16,float16,0,0.07648533085982005
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,40,128,1,float16,fp8,0,0.02977066735426585
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,4,128,1,float16,fp8,0,0.03133333226044973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,40,128,1,float16,float16,0,0.03146666785081228
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,8,128,1,float16,fp8,0,0.029359998802344005
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,1,128,1,float16,fp8,0,0.042453333735466
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,1,128,1,float16,fp8,0,1.3081973393758137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,2,128,1,float16,float16,0,1.301477352778117
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,4,128,1,float16,fp8,0,0.04168533285458883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,2,128,1,float16,fp8,0,0.03120533376932144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,8,128,1,float16,float16,0,0.031397332747777305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,2,128,1,float16,fp8,0,1.3067893187204997
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,4,128,1,float16,fp8,0,1.2767893473307292
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,8,128,1,float16,float16,0,1.3078986803690593
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,1,128,1,float16,float16,0,1.332693338394165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,40,128,1,float16,float16,0,0.6871253649393717
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,1,128,1,float16,float16,0,0.6764799753824869
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,40,128,1,float16,fp8,0,0.6478186845779419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,2,128,1,float16,float16,0,0.6829120318094889
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,8,128,1,float16,fp8,0,1.2765653133392334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,2,128,1,float16,fp8,0,0.6599786678949991
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,4,128,1,float16,float16,0,0.6757653554280599
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,4,128,1,float16,float16,0,0.03330666571855545
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,4,128,1,float16,float16,0,1.3042293389638264
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,4,128,1,float16,fp8,0,0.6600106557210287
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,1,128,1,float16,fp8,0,0.6610986789067587
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,8,128,1,float16,float16,0,0.6902026335398356
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,8,128,1,float16,fp8,0,0.6606826782226562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,2,128,1,float16,fp8,0,0.3303893407185872
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,40,128,1,float16,float16,0,0.3436160087585449
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,2,128,1,float16,float16,0,0.3369866609573364
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,40,128,1,float16,fp8,0,0.33112533887227374
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,8,128,1,float16,float16,0,0.3386293252309163
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,4,128,1,float16,float16,0,0.35066668192545575
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,1,128,1,float16,fp8,0,0.33839468161265057
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,8,128,1,float16,fp8,0,0.33007999261220294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,40,128,1,float16,fp8,0,0.1772800087928772
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,1,128,1,float16,fp8,0,0.17812265952428183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,40,128,1,float16,float16,0,0.18093866109848022
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,1,128,1,float16,float16,0,0.3364959955215454
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,4,128,1,float16,fp8,0,0.17730132738749185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,4,128,1,float16,float16,0,0.1796906590461731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,2,128,1,float16,fp8,0,0.17164800564448038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,8,128,1,float16,float16,0,0.17721599340438843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,8,128,1,float16,fp8,0,0.1715893348058065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,1,128,1,float16,float16,0,0.09671466549237569
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,2,128,1,float16,fp8,0,0.09289066990216573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,2,128,1,float16,float16,0,0.09591999650001526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,40,128,1,float16,fp8,0,0.09379200140635173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,40,128,1,float16,float16,0,0.09930133819580078
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,4,128,1,float16,float16,0,0.09514133135477702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,4,128,1,float16,fp8,0,0.09247466921806335
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,8,128,1,float16,float16,0,0.09643733501434326
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,8,128,1,float16,fp8,0,0.09782399733861287
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,1,128,1,float16,fp8,0,0.0537120004494985
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,4,128,1,float16,fp8,0,0.33825067679087323
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,1,128,1,float16,float16,0,0.18605866034825644
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,2,128,1,float16,float16,0,0.05599466462930044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,40,128,1,float16,float16,0,0.05795733133951823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,2,128,1,float16,float16,0,0.1762453317642212
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,2,128,1,float16,fp8,0,0.05332266787687937
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,4,128,1,float16,float16,0,0.05792533357938131
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,4,128,1,float16,fp8,0,0.053946668903032936
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,8,128,1,float16,float16,0,0.05603733162085215
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,8,128,1,float16,fp8,0,0.052426666021347046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,1,128,1,float16,float16,0,0.036229332288106285
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,1,128,1,float16,fp8,0,0.03319466610749563
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,40,128,1,float16,fp8,0,0.033589333295822144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,2,128,1,float16,float16,0,0.03562666724125544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,40,128,1,float16,float16,0,0.03526400029659271
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,4,128,1,float16,float16,0,0.03558400024970373
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,4,128,1,float16,fp8,0,0.03358400116364161
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,8,128,1,float16,float16,0,0.033520000676314034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,8,128,1,float16,fp8,0,0.0335413341720899
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,1,128,1,float16,fp8,0,0.09660800298055013
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,1,128,1,float16,float16,0,0.026399999856948853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,1,128,1,float16,fp8,0,0.025258667767047882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,2,128,1,float16,float16,0,0.025637333591779072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,40,128,1,float16,float16,0,0.025290665527184803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,2,128,1,float16,fp8,0,0.025258667767047882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,4,128,1,float16,float16,0,0.026373334228992462
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,4,128,1,float16,fp8,0,0.02499733368555705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,8,128,1,float16,fp8,0,0.025087999800841015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,1,128,1,float16,float16,0,0.05815466741720835
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,8,128,1,float16,float16,0,0.02513066679239273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,1,128,1,float16,float16,0,0.018277333428462345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,1,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,40,128,1,float16,fp8,0,0.05385066568851471
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,2,128,1,float16,float16,0,0.01894933357834816
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,40,128,1,float16,fp8,0,0.016757333030303318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,4,128,1,float16,float16,0,0.01749333366751671
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,4,128,1,float16,fp8,0,0.017184000462293625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,8,128,1,float16,float16,0,0.01883200059334437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,8,128,1,float16,fp8,0,0.017152000218629837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,2,128,1,float16,fp8,0,0.03330666571855545
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,1,128,1,float16,fp8,0,0.5495253403981527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,1,128,1,float16,float16,0,0.5433493455251058
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,2,128,1,float16,float16,0,0.5435733397801717
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,40,128,1,float16,fp8,0,0.025253333151340485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,2,128,1,float16,fp8,0,0.5434026718139648
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,2,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,4,128,1,float16,fp8,0,0.5483413139979044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,40,128,1,float16,float16,0,0.017194667210181553
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,40,128,1,float16,float16,0,0.31572800874710083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,4,128,1,float16,float16,0,0.5448906819025675
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,8,128,1,float16,float16,0,0.5453386704126993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,8,128,1,float16,fp8,0,0.5435733397801717
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,40,128,1,float16,fp8,0,0.2772480050722758
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,1,128,1,float16,float16,0,0.2773226698239644
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,1,128,1,float16,fp8,0,0.27727999289830524
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,2,128,1,float16,float16,0,0.28751466671625775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,2,128,1,float16,fp8,0,0.28197866678237915
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,1,128,1,float16,float16,0,0.1530026694138845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,40,128,1,float16,fp8,0,0.14681599537531534
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,4,128,1,float16,float16,0,0.27799467245737713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,1,128,1,float16,fp8,0,0.14611732959747314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,2,128,1,float16,float16,0,0.15081600348154703
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,40,128,1,float16,float16,0,0.15030399958292642
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,8,128,1,float16,float16,0,0.27874133984247845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,4,128,1,float16,fp8,0,0.2799253265062968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,8,128,1,float16,fp8,0,0.27715200185775757
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,2,128,1,float16,fp8,0,0.14607466260592142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,4,128,1,float16,fp8,0,0.14621866742769876
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,4,128,1,float16,float16,0,0.14761066436767578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,1,128,1,float16,fp8,0,0.07681599756081899
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,2,128,1,float16,float16,0,0.0821973333756129
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,8,128,1,float16,float16,0,0.14707733194033304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,4,128,1,float16,float16,0,0.07834133505821228
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,4,128,1,float16,fp8,0,0.07855466504891713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,2,128,1,float16,fp8,0,0.07898133496443431
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,40,128,1,float16,fp8,0,0.07821866869926453
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,8,128,1,float16,float16,0,0.07962133487065633
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,1,128,1,float16,float16,0,0.078575998544693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,40,128,1,float16,float16,0,0.08105066418647766
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,8,128,1,float16,fp8,0,0.07646400233109792
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,1,128,1,float16,float16,0,0.04756799836953481
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,2,128,1,float16,float16,0,0.046538665890693665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,1,128,1,float16,fp8,0,0.04574400186538696
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,2,128,1,float16,fp8,0,0.04610666632652283
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,40,128,1,float16,fp8,0,0.045797333121299744
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,4,128,1,float16,fp8,0,0.04562133550643921
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,4,128,1,float16,float16,0,0.047509332497914634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,8,128,1,float16,fp8,0,0.04580800235271454
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,8,128,1,float16,float16,0,0.04770666857560476
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,1,128,1,float16,float16,0,0.031210665901501972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,1,128,1,float16,fp8,0,0.029365333418051403
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,2,128,1,float16,float16,0,0.031178665657838184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,4,128,1,float16,fp8,0,0.029146666328112285
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,4,128,1,float16,float16,0,0.03014400104681651
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,2,128,1,float16,fp8,0,0.030218665798505146
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,40,128,1,float16,float16,0,0.03141333411137263
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,40,128,1,float16,fp8,0,0.02941333254178365
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,8,128,1,float16,float16,0,0.029717333614826202
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,8,128,1,float16,fp8,0,0.029343999922275543
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,1,128,1,float16,float16,0,0.023178666830062866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,2,128,1,float16,float16,0,0.023039999107519787
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,2,128,1,float16,fp8,0,0.021338666478792827
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,4,128,1,float16,float16,0,0.023344000180562336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,4,128,1,float16,fp8,0,0.02124800036350886
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,8,128,1,float16,float16,0,0.021130666136741638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,40,128,1,float16,float16,0,0.022970666488011677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,8,128,1,float16,fp8,0,0.02128533273935318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,1,128,1,float16,float16,0,0.014970666418472925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,40,128,1,float16,fp8,0,0.014762666076421738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,1,128,1,float16,fp8,0,0.01481066644191742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,2,128,1,float16,float16,0,0.014853333433469137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,2,128,1,float16,fp8,0,0.014746667196353277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,40,128,1,float16,float16,0,0.014688000082969666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,4,128,1,float16,fp8,0,0.014794666320085526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,8,128,1,float16,float16,0,0.014794666320085526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,8,128,1,float16,fp8,0,0.014757333944241205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,40,128,1,float16,float16,0,0.049733335773150124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,1,128,1,float16,float16,0,0.01523200049996376
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,1,128,1,float16,fp8,0,0.014965333044528961
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,40,128,1,float16,float16,0,0.01482133318980535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,40,128,1,float16,fp8,0,0.01482133318980535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,2,128,1,float16,float16,0,0.014912000546852747
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,2,128,1,float16,fp8,0,0.014639999717473984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,4,128,1,float16,float16,0,0.01482133318980535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,4,128,1,float16,fp8,0,0.015002666662136713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,1,128,1,float16,fp8,0,0.021151999632517498
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,8,128,1,float16,float16,0,0.014842666685581207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,8,128,1,float16,fp8,0,0.01469333345691363
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,8,128,1,float16,fp8,0,0.14615999658902487
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,40,128,1,float16,fp8,0,0.021386665602525074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,1,128,1,float16,float16,0,0.3386773268381755
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,1,128,1,float16,fp8,0,0.33643198013305664
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,2,128,1,float16,float16,0,0.33829331398010254
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,2,128,1,float16,fp8,0,0.3368266820907593
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,4,128,1,float16,float16,0,0.338703989982605
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,4,128,1,float16,float16,0,0.014917333920796713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,4,128,1,float16,fp8,0,0.3345173199971517
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,1,128,1,float16,float16,0,0.17482666174570718
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,8,128,1,float16,float16,0,0.3385653495788574
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,2,128,1,float16,float16,0,0.17481066783269247
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,8,128,1,float16,fp8,0,0.3366080125172933
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,40,128,1,float16,float16,0,0.17616534233093262
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,40,128,1,float16,fp8,0,0.17229332526524863
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,2,128,1,float16,fp8,0,0.17249067624409994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,4,128,1,float16,float16,0,0.1745120088259379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,8,128,1,float16,float16,0,0.17463467518488565
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,1,128,1,float16,float16,0,0.09384533762931824
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,4,128,1,float16,fp8,0,0.17339199781417847
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,8,128,1,float16,fp8,0,0.17443199952443442
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,1,128,1,float16,fp8,0,0.0926026701927185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,40,128,1,float16,float16,0,0.09750933448473613
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,2,128,1,float16,fp8,0,0.09231999516487122
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,4,128,1,float16,float16,0,0.09493866562843323
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,2,128,1,float16,float16,0,0.09571199615796407
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,4,128,1,float16,fp8,0,0.09278933207194011
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,8,128,1,float16,float16,0,0.09292800227801006
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,1,128,1,float16,float16,0,0.051818668842315674
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,2,128,1,float16,fp8,0,0.05000533163547516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,40,128,1,float16,float16,0,0.05212266743183136
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,2,128,1,float16,float16,0,0.05189333359400431
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,4,128,1,float16,float16,0,0.051813334226608276
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,4,128,1,float16,fp8,0,0.04970133304595947
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,8,128,1,float16,float16,0,0.051327998439470925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,8,128,1,float16,fp8,0,0.04971200227737427
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,1,128,1,float16,float16,0,0.031146667897701263
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,1,128,1,float16,fp8,0,0.17273600896199545
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,1,128,1,float16,fp8,0,0.029861333469549816
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,40,128,1,float16,float16,0,0.03123733401298523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,2,128,1,float16,float16,0,0.03156800071398417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,40,128,1,float16,fp8,0,0.029290666182835896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,2,128,1,float16,fp8,0,0.029472000896930695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,4,128,1,float16,float16,0,0.03072533259789149
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,4,128,1,float16,fp8,0,0.0295413335164388
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,40,128,1,float16,fp8,0,0.092549333969752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,8,128,1,float16,float16,0,0.029525332152843475
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,8,128,1,float16,fp8,0,0.029504001140594482
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,1,128,1,float16,float16,0,0.020938667158285778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,1,128,1,float16,fp8,0,0.019834666202465694
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,2,128,1,float16,fp8,0,0.020549333343903225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,8,128,1,float16,fp8,0,0.09357866644859314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,40,128,1,float16,float16,0,0.021045332153638203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,4,128,1,float16,float16,0,0.020949333906173706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,40,128,1,float16,fp8,0,0.020517333100239437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,4,128,1,float16,fp8,0,0.019626667102177937
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,8,128,1,float16,fp8,0,0.019493332753578823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,8,128,1,float16,float16,0,0.02123733361562093
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,40,128,1,float16,fp8,0,0.04982399940490723
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,2,128,1,float16,float16,0,0.015674666812022526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,1,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,40,128,1,float16,float16,0,0.014933332800865173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,2,128,1,float16,fp8,0,0.015002666662136713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,4,128,1,float16,float16,0,0.015002666662136713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,4,128,1,float16,fp8,0,0.014938666174809137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,8,128,1,float16,float16,0,0.015024000157912573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,8,128,1,float16,fp8,0,0.01611199975013733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,1,128,1,float16,fp8,0,0.01098666712641716
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,1,128,1,float16,float16,0,0.010725333044926325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,2,128,1,float16,float16,0,0.010890666395425797
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,40,128,1,float16,float16,0,0.01097600037852923
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,4,128,1,float16,float16,0,0.010954666882753372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,4,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,8,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,8,128,1,float16,float16,0,0.010933333386977514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,2,128,1,float16,float16,0,0.021226666867733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,1,128,1,float16,fp8,0,0.04971200227737427
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,40,128,1,float16,fp8,0,0.010757333288590113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,1,128,1,float16,float16,0,0.011071999867757162
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,40,128,1,float16,float16,0,0.01091733326514562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,1,128,1,float16,fp8,0,0.010741333166758219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,1,128,1,float16,float16,0,0.015024000157912573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,2,128,1,float16,float16,0,0.01099733387430509
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,4,128,1,float16,float16,0,0.010645333677530289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,4,128,1,float16,fp8,0,0.010656000425418219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,40,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,8,128,1,float16,float16,0,0.01098666712641716
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,8,128,1,float16,fp8,0,0.010693332801262537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,2,128,1,float16,fp8,0,0.010928000013033548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,1,128,1,float16,float16,0,0.2648319999376933
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,2,128,1,float16,fp8,0,0.25878934065500897
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,2,128,1,float16,float16,0,0.263482669989268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,1,128,1,float16,fp8,0,0.2586666742960612
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,4,128,1,float16,fp8,0,0.25909332434336346
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,4,128,1,float16,float16,0,0.262773334980011
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,2,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,8,128,1,float16,fp8,0,0.2587839961051941
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,40,128,1,float16,float16,0,0.1384160021940867
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,40,128,1,float16,fp8,0,0.1344480017820994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,8,128,1,float16,float16,0,0.2627519965171814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,1,128,1,float16,float16,0,0.13714133699735007
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,1,128,1,float16,fp8,0,0.13598933815956116
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,2,128,1,float16,fp8,0,0.1339466671148936
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,2,128,1,float16,float16,0,0.13598933815956116
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,40,128,1,float16,fp8,0,0.01081066702802976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,8,128,1,float16,fp8,0,0.13369066516558328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,1,128,1,float16,float16,0,0.07403199871381123
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,40,128,1,float16,fp8,0,0.07239466905593872
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,40,128,1,float16,float16,0,0.07478400071461995
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,1,128,1,float16,fp8,0,0.07282133400440216
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,2,128,1,float16,fp8,0,0.07114666700363159
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,2,128,1,float16,float16,0,0.07578666508197784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,8,128,1,float16,fp8,0,0.07237333556016286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,8,128,1,float16,float16,0,0.07547733187675476
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,4,128,1,float16,float16,0,0.07458666463692983
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,4,128,1,float16,fp8,0,0.07229333122571309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,40,128,1,float16,float16,0,0.0417546679576238
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,2,128,1,float16,float16,0,0.04151466737190882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,40,128,1,float16,fp8,0,0.03951466580231985
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,2,128,1,float16,fp8,0,0.03959999978542328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,4,128,1,float16,float16,0,0.03963200002908707
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,4,128,1,float16,fp8,0,0.039594667653242745
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,8,128,1,float16,float16,0,0.03965866565704346
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,8,128,1,float16,fp8,0,0.039359999199708305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,1,128,1,float16,float16,0,0.02496533344189326
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,40,128,1,float16,float16,0,0.0252960001428922
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,1,128,1,float16,fp8,0,0.02463999887307485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,40,128,1,float16,fp8,0,0.02327466756105423
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,2,128,1,float16,fp8,0,0.024346667031447094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,8,128,1,float16,float16,0,0.13691733280817667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,4,128,1,float16,float16,0,0.025237334271272022
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,4,128,1,float16,fp8,0,0.13485866785049438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,4,128,1,float16,fp8,0,0.024458666642506916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,8,128,1,float16,fp8,0,0.023658665517965954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,8,128,1,float16,float16,0,0.02521066615978877
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,1,128,1,float16,float16,0,0.0169813334941864
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,2,128,1,float16,float16,0,0.016970666746298473
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,40,128,1,float16,fp8,0,0.017125333348910015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,2,128,1,float16,fp8,0,0.016895999511082966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,40,128,1,float16,float16,0,0.01706133286158244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,4,128,1,float16,float16,0,0.016879999389251072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,4,128,1,float16,fp8,0,0.016810666769742966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,8,128,1,float16,fp8,0,0.01691199963291486
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,40,128,1,float16,float16,0,0.012981332838535309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,40,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,1,128,1,float16,float16,0,0.0129120002190272
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,2,128,1,float16,float16,0,0.012784000486135483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,1,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,2,128,1,float16,fp8,0,0.012746666868527731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,4,128,1,float16,float16,0,0.012736000120639801
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,2,128,1,float16,float16,0,0.025173333783944447
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,4,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,8,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,8,128,1,float16,float16,0,0.013002666334311167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,1,128,1,float16,float16,0,0.010559999694426855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,1,128,1,float16,fp8,0,0.010608000059922537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,40,128,1,float16,fp8,0,0.01098666712641716
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,2,128,1,float16,float16,0,0.009242666885256767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,2,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,1,128,1,float16,fp8,0,0.01714133347074191
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,4,128,1,float16,float16,0,0.009573333586255709
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,4,128,1,float16,fp8,0,0.010586666564146677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,8,128,1,float16,float16,0,0.010858666151762009
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,8,128,1,float16,fp8,0,0.009061333412925402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,8,128,1,float16,float16,0,0.01721599946419398
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,1,128,1,float16,float16,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,2,128,1,float16,float16,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,40,128,1,float16,float16,0,0.010826667149861654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,1,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,40,128,1,float16,fp8,0,0.009535999968647957
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,2,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,4,128,1,float16,float16,0,0.010650667051474253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,4,128,1,float16,fp8,0,0.010064000263810158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,8,128,1,float16,float16,0,0.00922133338948091
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,8,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,4,128,1,float16,float16,0,0.13780267039934793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,1,128,1,float16,float16,0,0.22733867168426514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,1,128,1,float16,fp8,0,0.2222879926363627
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,2,128,1,float16,float16,0,0.22970134019851685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,40,128,1,float16,float16,0,0.010714666297038397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,2,128,1,float16,fp8,0,0.22323733568191528
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,1,128,1,float16,float16,0,0.04154666761557261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,4,128,1,float16,float16,0,0.227183997631073
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,4,128,1,float16,fp8,0,0.22220265865325928
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,8,128,1,float16,float16,0,0.22719999154408774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,8,128,1,float16,fp8,0,0.22190932432810465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,1,128,1,float16,float16,0,0.12013333042462666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,1,128,1,float16,fp8,0,0.11587199568748474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,40,128,1,float16,float16,0,0.12027200063069661
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,40,128,1,float16,fp8,0,0.11582400401433308
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,2,128,1,float16,float16,0,0.12225066622098286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,4,128,1,float16,float16,0,0.11955199639002483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,4,128,1,float16,fp8,0,0.11525866389274597
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,8,128,1,float16,fp8,0,0.11661332845687866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,40,128,1,float16,fp8,0,0.06237333516279856
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,1,128,1,float16,float16,0,0.06419200201829274
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,40,128,1,float16,float16,0,0.06603200236956279
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,1,128,1,float16,fp8,0,0.06372799972693126
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,2,128,1,float16,float16,0,0.06410666803518932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,2,128,1,float16,fp8,0,0.062277331948280334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,1,128,1,float16,fp8,0,0.039477333426475525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,4,128,1,float16,float16,0,0.06742933392524719
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,4,128,1,float16,fp8,0,0.0621066689491272
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,8,128,1,float16,fp8,0,0.06214400132497152
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,8,128,1,float16,float16,0,0.0639626681804657
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,1,128,1,float16,fp8,0,0.03428266694148382
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,1,128,1,float16,float16,0,0.03770133356253306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,40,128,1,float16,float16,0,0.037274666130542755
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,2,128,1,float16,float16,0,0.037205333511034645
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,2,128,1,float16,fp8,0,0.03532266616821289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,4,128,1,float16,float16,0,0.03598399957021078
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,4,128,1,float16,fp8,0,0.03459733227888743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,8,128,1,float16,fp8,0,0.03390933324893316
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,8,128,1,float16,float16,0,0.03620799879233042
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,1,128,1,float16,float16,0,0.023152001202106476
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,2,128,1,float16,fp8,0,0.11752532919247945
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,1,128,1,float16,fp8,0,0.021221332252025604
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,2,128,1,float16,float16,0,0.023237332701683044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,40,128,1,float16,fp8,0,0.021130666136741638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,40,128,1,float16,float16,0,0.023056000471115112
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,4,128,1,float16,float16,0,0.023189333577950794
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,4,128,1,float16,fp8,0,0.021216000119845074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,8,128,1,float16,float16,0,0.022970666488011677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,8,128,1,float16,fp8,0,0.021226666867733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,1,128,1,float16,float16,0,0.015061333775520325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,8,128,1,float16,float16,0,0.12006933490435283
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,2,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,1,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,40,128,1,float16,fp8,0,0.014805333067973455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,4,128,1,float16,float16,0,0.01685333376129468
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,4,128,1,float16,fp8,0,0.015189333508412043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,40,128,1,float16,float16,0,0.015471999843915304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,8,128,1,float16,fp8,0,0.015013333410024643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,8,128,1,float16,float16,0,0.015130666395028433
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,1,128,1,float16,fp8,0,0.012645332763592402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,1,128,1,float16,float16,0,0.012874666601419449
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,2,128,1,float16,fp8,0,0.012773333738247553
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,40,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,4,128,1,float16,fp8,0,0.012645332763592402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,8,128,1,float16,float16,0,0.012901333471139273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,8,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,40,128,1,float16,fp8,0,0.0352906659245491
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,1,128,1,float16,float16,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,40,128,1,float16,float16,0,0.009189333145817121
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,2,128,1,float16,float16,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,2,128,1,float16,fp8,0,0.021354667842388153
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,2,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,4,128,1,float16,float16,0,0.008858666444818178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,8,128,1,float16,float16,0,0.010069333637754122
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,2,128,1,float16,float16,0,0.015135999768972397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,8,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,2,128,1,float16,float16,0,0.013013333082199097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,40,128,1,float16,float16,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,1,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,40,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,40,128,1,float16,float16,0,0.01301866645614306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,4,128,1,float16,float16,0,0.012794667234023413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,1,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,2,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,8,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,8,128,1,float16,float16,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,1,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,4,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,1,128,1,float16,float16,0,0.22376000881195068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,2,128,1,float16,float16,0,0.2212053338686625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,2,128,1,float16,fp8,0,0.20754132668177286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,4,128,1,float16,float16,0,0.22209066152572632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,4,128,1,float16,fp8,0,0.2076639930407206
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,4,128,1,float16,fp8,0,0.009119999905427298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,8,128,1,float16,float16,0,0.21975467602411905
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,40,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,8,128,1,float16,fp8,0,0.2086720069249471
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,40,128,1,float16,float16,0,0.1174720029036204
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,40,128,1,float16,fp8,0,0.10939733187357585
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,2,128,1,float16,float16,0,0.11664000153541565
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,1,128,1,float16,fp8,0,0.1086293359597524
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,1,128,1,float16,fp8,0,0.2076639930407206
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,2,128,1,float16,fp8,0,0.10796266794204712
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,4,128,1,float16,float16,0,0.11564800143241882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,4,128,1,float16,fp8,0,0.1088800032933553
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,8,128,1,float16,float16,0,0.11703466375668843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,8,128,1,float16,fp8,0,0.10730666915575664
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,1,128,1,float16,float16,0,0.06603200236956279
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,40,128,1,float16,fp8,0,0.057999998331069946
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,1,128,1,float16,fp8,0,0.05806399881839752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,40,128,1,float16,float16,0,0.06554133196671803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,2,128,1,float16,float16,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,2,128,1,float16,float16,0,0.06457599997520447
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,2,128,1,float16,fp8,0,0.060133333007494606
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,4,128,1,float16,float16,0,0.06224533418814341
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,4,128,1,float16,fp8,0,0.05979733169078827
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,8,128,1,float16,float16,0,0.06225599845250448
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,8,128,1,float16,fp8,0,0.05807999769846598
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,40,128,1,float16,fp8,0,0.033344000577926636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,40,128,1,float16,float16,0,0.03729599962631861
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,1,128,1,float16,fp8,0,0.033546666304270424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,1,128,1,float16,float16,0,0.11600533127784729
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,2,128,1,float16,fp8,0,0.033386667569478355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,4,128,1,float16,fp8,0,0.033413333197434746
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,8,128,1,float16,float16,0,0.035589332381884255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,8,128,1,float16,fp8,0,0.03347733368476232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,1,128,1,float16,float16,0,0.022629333039124806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,1,128,1,float16,fp8,0,0.02110933264096578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,2,128,1,float16,float16,0,0.02327466756105423
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,40,128,1,float16,fp8,0,0.02102400114138921
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,2,128,1,float16,fp8,0,0.021146667500336964
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,4,128,1,float16,float16,0,0.02163733293612798
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,4,128,1,float16,fp8,0,0.021210665504137676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,8,128,1,float16,float16,0,0.022815999885400135
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,8,128,1,float16,fp8,0,0.02111999938885371
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,1,128,1,float16,float16,0,0.015109332899252573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,4,128,1,float16,float16,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,1,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,2,128,1,float16,fp8,0,0.014938666174809137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,40,128,1,float16,float16,0,0.015279999623696009
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,40,128,1,float16,fp8,0,0.015130666395028433
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,4,128,1,float16,float16,0,0.01505600040157636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,4,128,1,float16,fp8,0,0.015072000523408255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,8,128,1,float16,float16,0,0.014970666418472925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,8,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,2,128,1,float16,float16,0,0.03757333258787791
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,1,128,1,float16,float16,0,0.0129120002190272
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,1,128,1,float16,fp8,0,0.012671999633312225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,2,128,1,float16,float16,0,0.012954667210578918
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,4,128,1,float16,float16,0,0.03736533224582672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,2,128,1,float16,fp8,0,0.011936000237862269
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,4,128,1,float16,float16,0,0.012938667088747025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,40,128,1,float16,fp8,0,0.011829332758982977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,4,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,8,128,1,float16,fp8,0,0.01198400060335795
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,1,128,1,float16,float16,0,0.009216000015536943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,40,128,1,float16,float16,0,0.022976001103719074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,1,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,40,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,2,128,1,float16,fp8,0,0.009077333534757296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,40,128,1,float16,float16,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,4,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,8,128,1,float16,float16,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,8,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,2,128,1,float16,float16,0,0.015087999403476715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,40,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,40,128,1,float16,float16,0,0.009162666896979014
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,1,128,1,float16,float16,0,0.009248000259200731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,1,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,2,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,4,128,1,float16,float16,0,0.008858666444818178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,1,128,1,float16,float16,0,0.03702933341264725
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,4,128,1,float16,fp8,0,0.008799999952316284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,8,128,1,float16,float16,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,8,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,40,128,1,float16,float16,0,0.013077333569526672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,8,128,1,float16,float16,0,0.012794667234023413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,1,128,1,float16,float16,0,0.012026666353146235
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,1,128,1,float16,fp8,0,0.010714666297038397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,2,128,1,float16,float16,0,0.018778666853904724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,2,128,1,float16,float16,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,2,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,4,128,1,float16,float16,0,0.021210665504137676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,0,0.01842133328318596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,0,0.02313599983851115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,1,128,1,float16,float16,0,0.009328000247478485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,40,128,1,float16,fp8,0,0.04781866570313772
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,40,128,1,float16,float16,0,0.06417599817117055
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,1,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,2,128,1,float16,float16,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,2,128,1,float16,float16,0,0.012693333129088083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,2,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,0,0.012650666137536367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,0,0.015034666905800501
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,0,0.017231999586025875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,0,0.015077333897352219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,1,128,1,float16,float16,0,0.008592000231146812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,1,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,40,128,1,float16,float16,0,0.03286933402220408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,2,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,40,128,1,float16,fp8,0,0.02701866626739502
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,0,0.010735999792814255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,4,128,1,float16,float16,0,0.00874133345981439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,0,0.011445333560307821
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,1,128,1,float16,float16,0,0.008570666735370954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,2,128,1,float16,float16,0,0.008736000085870424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,1,128,1,float16,fp8,0,0.007994666695594788
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,8,128,1,float16,float16,0,0.027376001079877216
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,40,128,1,float16,float16,0,0.01918399954835574
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,40,128,1,float16,fp8,0,0.0170666662355264
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,0,0.008672000219424566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,0,0.008527999743819237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,1,128,1,float16,float16,0,0.008789333204428354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,2,128,1,float16,float16,0,0.008826666822036108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,40,128,1,float16,float16,0,0.012986666212479273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,40,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,2,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,0,0.007071999832987785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,0,0.006911999856432279
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,2,128,1,float16,float16,0,0.009408000235756239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,1,128,1,float16,float16,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,1,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,2,128,1,float16,float16,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,40,128,1,float16,float16,0,0.010901333143313726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,40,128,1,float16,fp8,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,2,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,0,0.006874666859706243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,0,0.007120000198483467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,0,0.00697066696981589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,2,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,1,128,1,float16,fp8,0,0.0069333333522081375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,40,128,1,float16,float16,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,0,0.00707733320693175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,40,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,2,128,1,float16,float16,0,0.008639999975760778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,1,128,1,float16,fp8,0,0.007242666557431221
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,2,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,0,0.006981333096822103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,0,0.0068693334857622785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,1,128,1,float16,float16,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,1,128,1,float16,fp8,0,0.006784000123540561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,2,128,1,float16,float16,0,0.008746666833758354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,40,128,1,float16,float16,0,0.006911999856432279
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,0,0.008767999708652496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,2,128,1,float16,fp8,0,0.008581333483258883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,0,0.007029333462317784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,0,0.007018666714429855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,0,0.006725333631038666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,40,128,1,float16,fp8,0,0.006842666616042455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,40,128,1,float16,float16,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,0,0.006874666859706243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,1,128,1,float16,float16,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,2,128,1,float16,float16,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,1,128,1,float16,float16,0,0.008826666822036108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,2,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,0,0.006895999734600385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,0,0.006826666494210561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,0,0.0069226666043202085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,0,0.006874666859706243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,0,0.006895999734600385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,0,0.006725333631038666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,40,128,1,float16,fp8,0,0.006757333253820737
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,0,0.006704000135262807
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,1,128,1,float16,fp8,0,0.007002666592597961
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,1,128,1,float16,fp8,0,9.969850540161133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,2,128,1,float16,float16,0,10.847727457682291
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,2,128,1,float16,fp8,0,9.968079884847006
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,4,128,1,float16,float16,0,10.844192504882812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,4,128,1,float16,fp8,0,9.97109858194987
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,8,128,1,float16,float16,0,10.849077860514322
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,32,128,1,float16,float16,0,5.496357599894206
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,8,128,1,float16,fp8,0,10.534298578898111
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,32,128,1,float16,fp8,0,5.054309209187825
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,1,128,1,float16,fp8,0,5.3898665110270185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,1,128,1,float16,float16,0,7.448575973510742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,2,128,1,float16,float16,0,5.484767913818359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,2,128,1,float16,fp8,0,5.368213017781575
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,1,128,1,float16,float16,0,14.487743377685547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,4,128,1,float16,float16,0,5.486869176228841
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,32,128,1,float16,float16,0,2.949199994405111
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,8,128,1,float16,float16,0,5.491039911905925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,1,128,1,float16,float16,0,2.8675947189331055
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,8,128,1,float16,fp8,0,5.39299201965332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,1,128,1,float16,fp8,0,2.744506518046061
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,4,128,1,float16,fp8,0,2.5817012786865234
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,2,128,1,float16,fp8,0,2.7634239196777344
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,4,128,1,float16,float16,0,2.8216638565063477
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,8,128,1,float16,float16,0,2.852325439453125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,1,128,1,float16,float16,0,1.7736320495605469
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,1,128,1,float16,fp8,0,1.440874735514323
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,32,128,1,float16,float16,0,1.5333280563354492
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,32,128,1,float16,fp8,0,1.352677345275879
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,32,128,1,float16,fp8,0,2.5834666887919107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,2,128,1,float16,fp8,0,1.4533440272013347
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,2,128,1,float16,float16,0,1.782709280649821
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,4,128,1,float16,fp8,0,1.3518719673156738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,4,128,1,float16,float16,0,1.5340906778971355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,8,128,1,float16,float16,0,1.4693652788798015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,8,128,1,float16,fp8,0,1.4500800768534343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,2,128,1,float16,float16,0,3.542048136393229
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,8,128,1,float16,fp8,0,2.7455625534057617
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,1,128,1,float16,fp8,0,5.740634918212891
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,4,128,1,float16,fp8,0,5.368757247924805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,2,128,1,float16,float16,0,6.231903711954753
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,2,128,1,float16,fp8,0,6.035717646280925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,1,128,1,float16,float16,0,8.85647455851237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,4,128,1,float16,float16,0,6.248405456542969
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,4,128,1,float16,fp8,0,6.121039708455403
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,32,128,1,float16,fp8,0,3.11405340830485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,32,128,1,float16,float16,0,3.175312042236328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,1,128,1,float16,fp8,0,3.1104211807250977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,8,128,1,float16,float16,0,6.871632258097331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,8,128,1,float16,fp8,0,6.133701324462891
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,2,128,1,float16,float16,0,3.189711888631185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,2,128,1,float16,fp8,0,2.9116481145222983
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,1,128,1,float16,float16,0,1.6337439219156902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,4,128,1,float16,fp8,0,3.1097545623779297
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,4,128,1,float16,float16,0,3.810154596964518
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,32,128,1,float16,fp8,0,1.5080374081929524
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,8,128,1,float16,fp8,0,3.2777013778686523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,1,128,1,float16,fp8,0,1.5003360112508137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,2,128,1,float16,float16,0,1.7461493810017903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,2,128,1,float16,fp8,0,1.79640531539917
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,4,128,1,float16,float16,0,1.6347467104593914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,4,128,1,float16,fp8,0,1.650394598642985
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,1,128,1,float16,float16,0,4.4810028076171875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,8,128,1,float16,float16,0,1.6347306569417317
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,8,128,1,float16,fp8,0,1.501205285390218
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,32,128,1,float16,float16,0,0.894490639368693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,32,128,1,float16,fp8,0,0.7967519760131836
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,2,128,1,float16,float16,0,0.9230773448944092
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,2,128,1,float16,fp8,0,0.7940266927083334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,4,128,1,float16,float16,0,0.9316533406575521
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,4,128,1,float16,fp8,0,0.8478773434956869
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,8,128,1,float16,fp8,0,0.8489813009897867
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,8,128,1,float16,float16,0,0.9202986558278402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,32,128,1,float16,float16,0,1.7295200030008953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,8,128,1,float16,float16,0,3.469674746195475
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,1,128,1,float16,fp8,0,4.061018625895183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,2,128,1,float16,fp8,0,4.060442606608073
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,1,128,1,float16,float16,0,0.9432799816131592
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,1,128,1,float16,float16,0,5.680165608723958
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,1,128,1,float16,fp8,0,0.849402666091919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,2,128,1,float16,float16,0,5.851088205973308
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,4,128,1,float16,float16,0,5.048362731933594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,1,128,1,float16,float16,0,2.744138717651367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,32,128,1,float16,float16,0,2.254021326700846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,1,128,1,float16,fp8,0,2.491173267364502
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,8,128,1,float16,float16,0,4.406911849975586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,8,128,1,float16,fp8,0,4.174218813578288
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,2,128,1,float16,float16,0,2.8084214528401694
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,32,128,1,float16,fp8,0,2.8021227518717446
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,4,128,1,float16,float16,0,2.246234734853109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,4,128,1,float16,fp8,0,2.2727413177490234
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,8,128,1,float16,fp8,0,2.109498659769694
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,8,128,1,float16,float16,0,2.246335983276367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,32,128,1,float16,fp8,0,1.1574933528900146
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,32,128,1,float16,float16,0,1.3224746386210124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,4,128,1,float16,fp8,0,4.364426612854004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,1,128,1,float16,float16,0,1.4489439328511555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,2,128,1,float16,fp8,0,1.1431732972462971
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,4,128,1,float16,float16,0,1.3368959426879883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,4,128,1,float16,fp8,0,1.1003200213114421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,8,128,1,float16,float16,0,1.4476586977640789
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,1,128,1,float16,float16,0,0.6852586269378662
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,32,128,1,float16,float16,0,0.6296000083287557
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,32,128,1,float16,fp8,0,0.5848373174667358
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,2,128,1,float16,float16,0,0.6435999870300293
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,2,128,1,float16,fp8,0,0.6129973332087199
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,4,128,1,float16,fp8,0,0.611786683400472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,4,128,1,float16,float16,0,0.7438986301422119
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,8,128,1,float16,float16,0,0.6259200175603231
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,2,128,1,float16,fp8,0,2.4991040229797363
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,1,128,1,float16,fp8,0,1.1785120169321697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,2,128,1,float16,float16,0,1.2357280254364014
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,8,128,1,float16,fp8,0,1.2744906743367512
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,1,128,1,float16,fp8,0,0.6134346723556519
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,1,128,1,float16,fp8,0,5.543408075968425
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,1,128,1,float16,float16,0,7.323498407999675
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,2,128,1,float16,fp8,0,5.2613067626953125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,8,128,1,float16,fp8,0,0.608458677927653
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,2,128,1,float16,float16,0,7.9159730275472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,4,128,1,float16,float16,0,5.715973536173503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,4,128,1,float16,fp8,0,5.264570554097493
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,1,128,1,float16,float16,0,3.1339521408081055
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,32,128,1,float16,float16,0,3.0819679896036782
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,1,128,1,float16,fp8,0,3.2633705139160156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,32,128,1,float16,fp8,0,2.6705652872721353
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,2,128,1,float16,float16,0,2.8927412033081055
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,8,128,1,float16,float16,0,7.162533442179362
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,2,128,1,float16,fp8,0,2.854698816935221
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,4,128,1,float16,fp8,0,2.8821706771850586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,32,128,1,float16,float16,0,1.4886399904886882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,8,128,1,float16,fp8,0,2.6698452631632485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,32,128,1,float16,fp8,0,1.5890560150146484
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,8,128,1,float16,float16,0,2.914560000101725
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,1,128,1,float16,fp8,0,1.4651360511779785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,2,128,1,float16,fp8,0,1.458661397298177
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,2,128,1,float16,float16,0,1.8328800201416016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,4,128,1,float16,float16,0,1.7485599517822266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,4,128,1,float16,fp8,0,1.4579307238260906
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,8,128,1,float16,fp8,0,5.263962745666504
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,8,128,1,float16,float16,0,1.4826666514078777
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,8,128,1,float16,fp8,0,1.5992053349812825
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,32,128,1,float16,float16,0,0.8092426458994547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,1,128,1,float16,float16,0,0.9359893004099528
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,32,128,1,float16,fp8,0,0.7158719698588053
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,2,128,1,float16,float16,0,0.8114133675893148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,2,128,1,float16,fp8,0,0.7854186693827311
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,4,128,1,float16,float16,0,0.8287786642710367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,4,128,1,float16,float16,0,3.507626533508301
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,4,128,1,float16,fp8,0,0.7639413674672445
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,1,128,1,float16,float16,0,1.6303359667460124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,8,128,1,float16,float16,0,0.790229320526123
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,8,128,1,float16,fp8,0,0.7855306466420492
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,1,128,1,float16,fp8,0,0.41496535142262775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,2,128,1,float16,float16,0,0.4375893274943034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,32,128,1,float16,fp8,0,0.41832534472147626
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,32,128,1,float16,float16,0,0.4322506586710612
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,4,128,1,float16,float16,0,0.4543999830881755
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,4,128,1,float16,fp8,0,0.3949439922968547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,8,128,1,float16,float16,0,0.48711999257405597
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,8,128,1,float16,fp8,0,0.4182720184326172
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,1,128,1,float16,float16,0,3.525871912638346
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,1,128,1,float16,fp8,0,3.0742292404174805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,1,128,1,float16,fp8,0,0.7630933125813802
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,2,128,1,float16,fp8,0,3.075450579325358
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,2,128,1,float16,float16,0,3.8946186701456704
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,1,128,1,float16,float16,0,0.4259573221206665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,2,128,1,float16,fp8,0,0.42717333634694415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,4,128,1,float16,fp8,0,3.0753173828125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,4,128,1,float16,float16,0,3.714655876159668
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,8,128,1,float16,float16,0,3.3429387410481772
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,1,128,1,float16,float16,0,1.8653546969095867
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,8,128,1,float16,fp8,0,3.0771306355794272
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,32,128,1,float16,float16,0,1.8669333457946777
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,32,128,1,float16,fp8,0,1.5709706942240398
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,1,128,1,float16,fp8,0,1.5622827212015789
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,2,128,1,float16,float16,0,1.6966346104939778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,2,128,1,float16,fp8,0,1.7634453773498535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,4,128,1,float16,float16,0,2.0936907132466636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,4,128,1,float16,fp8,0,1.7099626859029133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,1,128,1,float16,float16,0,1.0713866551717122
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,8,128,1,float16,float16,0,1.6997493108113606
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,1,128,1,float16,fp8,0,0.8803893725077311
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,8,128,1,float16,fp8,0,1.7173706690470378
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,32,128,1,float16,fp8,0,0.8096426328023275
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,32,128,1,float16,float16,0,0.8838079770406088
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,2,128,1,float16,float16,0,1.0577867031097412
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,2,128,1,float16,fp8,0,0.874895970026652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,4,128,1,float16,float16,0,0.8760106563568115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,4,128,1,float16,fp8,0,0.8060106436411539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,8,128,1,float16,float16,0,0.8813866774241129
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,8,128,1,float16,fp8,0,0.8152106602986654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,1,128,1,float16,fp8,0,0.46592533588409424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,1,128,1,float16,float16,0,0.5569653511047363
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,2,128,1,float16,float16,0,0.4729493459065755
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,32,128,1,float16,fp8,0,0.4309920072555542
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,2,128,1,float16,fp8,0,0.4277546803156535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,32,128,1,float16,float16,0,0.47440000375111896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,4,128,1,float16,float16,0,0.5032480160395304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,8,128,1,float16,float16,0,0.47375468413035077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,8,128,1,float16,fp8,0,0.504309336344401
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,1,128,1,float16,float16,0,0.28064533074696857
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,1,128,1,float16,fp8,0,0.25782932837804157
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,2,128,1,float16,fp8,0,0.25244800249735516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,4,128,1,float16,float16,0,0.30451200405756634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,4,128,1,float16,fp8,0,0.2590346733729045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,32,128,1,float16,float16,0,0.27002133925755817
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,8,128,1,float16,float16,0,0.2797066569328308
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,8,128,1,float16,fp8,0,0.25734400749206543
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,2,128,1,float16,float16,0,3.1505654652913413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,2,128,1,float16,fp8,0,2.905914624532064
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,1,128,1,float16,fp8,0,3.0576321283976235
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,4,128,1,float16,float16,0,3.154197374979655
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,1,128,1,float16,float16,0,3.9598239262898765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,4,128,1,float16,fp8,0,0.4587893486022949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,2,128,1,float16,float16,0,0.2995413343111674
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,32,128,1,float16,fp8,0,0.24206932385762533
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,4,128,1,float16,fp8,0,2.9064480463663735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,32,128,1,float16,float16,0,1.6245919863382976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,8,128,1,float16,float16,0,3.167125384012858
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,32,128,1,float16,fp8,0,1.6347626050313313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,1,128,1,float16,fp8,0,1.4722293217976887
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,2,128,1,float16,float16,0,1.9500266710917156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,2,128,1,float16,fp8,0,1.5741333961486816
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,4,128,1,float16,fp8,0,1.4866560300191243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,4,128,1,float16,float16,0,1.9660587310791016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,8,128,1,float16,fp8,0,1.5739413897196453
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,32,128,1,float16,fp8,0,0.7596373558044434
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,32,128,1,float16,float16,0,0.8921706676483154
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,1,128,1,float16,fp8,0,0.876693328221639
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,2,128,1,float16,float16,0,0.8970186710357666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,2,128,1,float16,fp8,0,0.8063519795735677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,4,128,1,float16,float16,0,0.9897173245747884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,4,128,1,float16,fp8,0,0.7650612990061442
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,8,128,1,float16,float16,0,0.8304800192515055
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,8,128,1,float16,fp8,0,0.8063573042551676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,8,128,1,float16,fp8,0,2.9089600245157876
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,32,128,1,float16,float16,0,0.47094400723775226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,32,128,1,float16,fp8,0,0.4246559937795003
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,1,128,1,float16,float16,0,0.46513601144154865
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,1,128,1,float16,fp8,0,0.4240320126215617
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,2,128,1,float16,float16,0,0.5062933365503947
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,2,128,1,float16,fp8,0,0.4195893208185832
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,4,128,1,float16,float16,0,0.4299253225326538
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,4,128,1,float16,fp8,0,0.4232853253682454
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,8,128,1,float16,float16,0,1.6463093757629395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,8,128,1,float16,float16,0,0.4310026566187541
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,8,128,1,float16,fp8,0,0.4166293144226074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,1,128,1,float16,float16,0,0.2690346638361613
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,1,128,1,float16,fp8,0,0.233189324537913
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,2,128,1,float16,float16,0,0.27172799905141193
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,1,128,1,float16,float16,0,0.8769120375315348
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,32,128,1,float16,float16,0,0.25845867395401
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,32,128,1,float16,fp8,0,0.2201919953028361
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,2,128,1,float16,fp8,0,0.23181867599487305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,4,128,1,float16,fp8,0,0.2333866755167643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,4,128,1,float16,float16,0,0.2677866617838542
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,8,128,1,float16,fp8,0,0.21580266952514648
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,1,128,1,float16,fp8,0,0.13755200306574503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,2,128,1,float16,fp8,0,0.1373599966367086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,32,128,1,float16,fp8,0,0.12878400087356567
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,32,128,1,float16,float16,0,0.15081600348154703
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,4,128,1,float16,float16,0,0.15306666493415833
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,4,128,1,float16,fp8,0,0.13739200433095297
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,1,128,1,float16,float16,0,1.5948425928751628
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,8,128,1,float16,float16,0,0.137855996688207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,8,128,1,float16,fp8,0,0.1370186706384023
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,1,128,1,float16,float16,0,1.8885706265767415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,1,128,1,float16,fp8,0,1.743925412495931
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,2,128,1,float16,float16,0,1.8893973032633464
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,2,128,1,float16,fp8,0,1.7451732953389485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,4,128,1,float16,float16,0,1.8916053771972656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,8,128,1,float16,float16,0,0.23760533332824707
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,1,128,1,float16,float16,0,0.14152000347773233
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,8,128,1,float16,fp8,0,1.74836270014445
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,2,128,1,float16,float16,0,0.1492639978726705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,1,128,1,float16,fp8,0,0.9382186730702718
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,2,128,1,float16,float16,0,0.9615253607432047
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,32,128,1,float16,float16,0,0.9795786539713541
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,32,128,1,float16,fp8,0,0.894490639368693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,4,128,1,float16,fp8,0,0.88755202293396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,4,128,1,float16,float16,0,1.1016480127970378
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,8,128,1,float16,float16,0,0.9644479751586914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,8,128,1,float16,fp8,0,0.889695962270101
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,1,128,1,float16,fp8,0,0.4896426598230998
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,1,128,1,float16,float16,0,0.5327626864115397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,32,128,1,float16,fp8,0,0.46267199516296387
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,32,128,1,float16,float16,0,0.5119359890619913
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,2,128,1,float16,float16,0,0.5490719874699911
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,2,128,1,float16,fp8,0,0.487770676612854
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,4,128,1,float16,float16,0,0.49701865514119464
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,4,128,1,float16,fp8,0,0.4900960127512614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,8,128,1,float16,float16,0,1.8956373532613118
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,8,128,1,float16,float16,0,0.49836798508961994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,8,128,1,float16,fp8,0,0.45947198073069256
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,1,128,1,float16,float16,0,0.3184906641642253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,32,128,1,float16,float16,0,0.29254400730133057
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,1,128,1,float16,float16,0,0.9598879814147949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,2,128,1,float16,fp8,0,0.261898676554362
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,2,128,1,float16,fp8,0,0.9450026353200277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,4,128,1,float16,float16,0,0.2905919949213664
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,8,128,1,float16,float16,0,0.26665065685908
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,32,128,1,float16,float16,0,0.1671253244082133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,32,128,1,float16,fp8,0,0.1426400045553843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,1,128,1,float16,float16,0,0.17166399955749512
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,1,128,1,float16,fp8,0,0.14812800288200378
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,2,128,1,float16,float16,0,0.16378133495648703
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,2,128,1,float16,fp8,0,0.14575466513633728
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,4,128,1,float16,float16,0,0.16620799899101257
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,4,128,1,float16,fp8,0,0.14572800199190775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,4,128,1,float16,fp8,0,1.7463572820027669
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,8,128,1,float16,float16,0,0.16729066769282022
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,8,128,1,float16,fp8,0,0.14626666903495789
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,1,128,1,float16,float16,0,0.09938133756319682
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,1,128,1,float16,fp8,0,0.2631946603457133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,32,128,1,float16,fp8,0,0.2489173412322998
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,1,128,1,float16,fp8,0,0.08928533395131429
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,32,128,1,float16,fp8,0,0.09074133634567261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,2,128,1,float16,float16,0,0.09874133268992107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,2,128,1,float16,fp8,0,0.08885866403579712
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,4,128,1,float16,fp8,0,0.09041066964467366
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,4,128,1,float16,fp8,0,0.2635519901911418
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,8,128,1,float16,fp8,0,0.26081599791844684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,8,128,1,float16,fp8,0,0.08845866719881694
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,1,128,1,float16,float16,0,1.8652373949686687
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,1,128,1,float16,fp8,0,1.7262239456176758
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,2,128,1,float16,float16,0,1.969194730122884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,2,128,1,float16,fp8,0,1.833898703257243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,4,128,1,float16,float16,0,1.869157314300537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,4,128,1,float16,fp8,0,1.7280853589375813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,32,128,1,float16,float16,0,0.09942932923634847
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,4,128,1,float16,float16,0,0.09727467099825542
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,8,128,1,float16,float16,0,1.962213357289632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,8,128,1,float16,fp8,0,1.7313547134399414
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,1,128,1,float16,float16,0,0.9435306390126547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,32,128,1,float16,float16,0,0.969210704167684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,2,128,1,float16,float16,0,1.1375679969787598
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,2,128,1,float16,fp8,0,0.8739466667175293
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,4,128,1,float16,float16,0,1.1670880317687988
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,4,128,1,float16,fp8,0,0.9265013535817465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,8,128,1,float16,float16,0,0.9496906598409017
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,8,128,1,float16,fp8,0,0.9275893370310465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,2,128,1,float16,float16,0,0.2660106619199117
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,32,128,1,float16,float16,0,0.5149493217468262
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,32,128,1,float16,fp8,0,0.4782826503117879
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,1,128,1,float16,float16,0,0.5873333215713501
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,1,128,1,float16,fp8,0,0.44812266031901044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,8,128,1,float16,float16,0,0.09018666545550029
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,2,128,1,float16,fp8,0,0.4756853183110555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,2,128,1,float16,float16,0,0.48852264881134033
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,4,128,1,float16,float16,0,0.48649601141611737
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,4,128,1,float16,fp8,0,0.449130654335022
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,1,128,1,float16,fp8,0,0.9247679710388184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,8,128,1,float16,float16,0,0.4873493512471517
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,1,128,1,float16,float16,0,0.25539199511210126
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,1,128,1,float16,fp8,0,0.2537226676940918
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,2,128,1,float16,float16,0,0.2898613413174947
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,2,128,1,float16,fp8,0,0.23596266905466715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,32,128,1,float16,fp8,0,0.24097599585851034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,32,128,1,float16,float16,0,0.26396799087524414
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,4,128,1,float16,fp8,0,0.25144533316294354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,4,128,1,float16,float16,0,0.25675733884175617
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,8,128,1,float16,float16,0,0.25730667511622113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,8,128,1,float16,fp8,0,0.23690666755040488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,1,128,1,float16,float16,0,0.14496533075968424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,1,128,1,float16,fp8,0,0.13851733009020487
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,32,128,1,float16,fp8,0,0.13179199894269308
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,2,128,1,float16,fp8,0,0.13699199755986533
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,2,128,1,float16,float16,0,0.1567093332608541
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,4,128,1,float16,float16,0,0.1521013379096985
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,32,128,1,float16,float16,0,0.1607093314329783
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,8,128,1,float16,float16,0,0.14340266585350037
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,8,128,1,float16,fp8,0,0.13922666509946188
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,1,128,1,float16,float16,0,0.09299733241399129
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,2,128,1,float16,float16,0,0.09123733639717102
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,1,128,1,float16,fp8,0,0.08202666540940602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,32,128,1,float16,fp8,0,0.07682133217652638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,4,128,1,float16,float16,0,0.09145599603652954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,4,128,1,float16,fp8,0,0.08132266501585643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,8,128,1,float16,float16,0,0.08261333405971527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,8,128,1,float16,fp8,0,0.08065600196520488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,1,128,1,float16,float16,0,0.05903466542561849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,32,128,1,float16,fp8,0,0.88591996828715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,32,128,1,float16,float16,0,0.0543093333641688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,32,128,1,float16,fp8,0,0.04971200227737427
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,4,128,1,float16,float16,0,0.057573333382606506
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,8,128,1,float16,fp8,0,0.45324798425038654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,2,128,1,float16,fp8,0,0.05333333214124044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,2,128,1,float16,float16,0,0.05949333310127258
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,4,128,1,float16,fp8,0,0.05194133520126343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,8,128,1,float16,float16,0,0.05875200033187866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,8,128,1,float16,fp8,0,0.04972266654173533
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,4,128,1,float16,fp8,0,0.13013333082199097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,1,128,1,float16,fp8,0,1.135696013768514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,1,128,1,float16,float16,0,1.162501335144043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,2,128,1,float16,float16,0,1.2788426876068115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,2,128,1,float16,fp8,0,1.0775573253631592
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,32,128,1,float16,float16,0,0.08710400263468425
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,4,128,1,float16,float16,0,1.1656533082326253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,4,128,1,float16,fp8,0,1.0796000162760417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,8,128,1,float16,float16,0,1.1702346801757812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,1,128,1,float16,fp8,0,0.05239466826121012
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,32,128,1,float16,fp8,0,0.5558613141377767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,8,128,1,float16,fp8,0,1.1666293144226074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,1,128,1,float16,fp8,0,0.5776960055033366
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,32,128,1,float16,float16,0,0.6088320016860962
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,2,128,1,float16,float16,0,0.7072052955627441
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,2,128,1,float16,fp8,0,0.5758399963378906
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,4,128,1,float16,float16,0,0.5968533356984457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,4,128,1,float16,fp8,0,0.5799253384272257
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,8,128,1,float16,fp8,0,0.5812373161315918
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,8,128,1,float16,float16,0,0.595850666364034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,1,128,1,float16,float16,0,0.34849599997202557
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,1,128,1,float16,fp8,0,0.28332799673080444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,4,128,1,float16,float16,0,0.3238240083058675
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,2,128,1,float16,float16,0,0.35259731610616046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,4,128,1,float16,fp8,0,0.2836640079816182
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,32,128,1,float16,float16,0,0.31568533182144165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,32,128,1,float16,fp8,0,0.2884053389231364
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,2,128,1,float16,fp8,0,0.08091733356316884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,1,128,1,float16,float16,0,0.18571732441584268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,8,128,1,float16,fp8,0,0.3027786612510681
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,32,128,1,float16,fp8,0,0.15446399648984274
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,1,128,1,float16,fp8,0,0.16375999649365744
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,32,128,1,float16,float16,0,0.1909066637357076
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,2,128,1,float16,fp8,0,0.16356799999872842
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,4,128,1,float16,float16,0,0.16634133458137512
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,4,128,1,float16,fp8,0,0.16176533699035645
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,8,128,1,float16,float16,0,0.165583997964859
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,1,128,1,float16,float16,0,0.6939360300699869
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,8,128,1,float16,fp8,0,0.1523360013961792
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,1,128,1,float16,float16,0,0.1030453344186147
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,2,128,1,float16,float16,0,0.10487999518712361
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,32,128,1,float16,float16,0,0.10735999544461568
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,32,128,1,float16,fp8,0,0.09217066566149394
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,2,128,1,float16,fp8,0,0.09220266342163086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,1,128,1,float16,fp8,0,0.09264000256856282
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,4,128,1,float16,float16,0,0.10157333811124165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,8,128,1,float16,float16,0,0.10136000315348308
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,8,128,1,float16,fp8,0,0.0851093331972758
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,1,128,1,float16,float16,0,0.056032001972198486
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,32,128,1,float16,fp8,0,0.05555200080076853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,1,128,1,float16,fp8,0,0.05209066470464071
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,32,128,1,float16,float16,0,0.06391466657320659
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,2,128,1,float16,float16,0,0.06043733159701029
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,4,128,1,float16,float16,0,0.05621333420276642
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,2,128,1,float16,fp8,0,0.3006880084673564
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,4,128,1,float16,fp8,0,0.05453866720199585
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,8,128,1,float16,float16,0,0.056176001826922096
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,8,128,1,float16,float16,0,0.30828799804051715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,2,128,1,float16,float16,0,0.18088533480962118
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,32,128,1,float16,fp8,0,0.03504000107447306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,1,128,1,float16,fp8,0,0.03754133234421412
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,2,128,1,float16,fp8,0,0.03772266705830892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,4,128,1,float16,float16,0,0.04127999891837438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,4,128,1,float16,fp8,0,0.035349334279696144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,4,128,1,float16,fp8,0,0.08455466230710347
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,8,128,1,float16,float16,0,0.037765334049860634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,8,128,1,float16,fp8,0,0.03522133330504099
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,2,128,1,float16,fp8,0,0.05171200136343638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,1,128,1,float16,float16,0,1.4076959292093914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,8,128,1,float16,fp8,0,0.05598400036493937
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,1,128,1,float16,fp8,0,1.1361760298411052
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,1,128,1,float16,float16,0,0.04168533285458883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,32,128,1,float16,float16,0,0.04168533285458883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,2,128,1,float16,float16,0,0.04144533226887385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,2,128,1,float16,float16,0,1.2243786652882893
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,2,128,1,float16,fp8,0,1.1867146492004395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,8,128,1,float16,fp8,0,1.1393919785817463
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,8,128,1,float16,float16,0,1.2325226465861003
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,4,128,1,float16,fp8,0,1.1373013655344646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,32,128,1,float16,float16,0,0.6381920178731283
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,4,128,1,float16,float16,0,1.22653333346049
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,1,128,1,float16,float16,0,0.6198879877726237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,32,128,1,float16,fp8,0,0.5840160051981608
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,2,128,1,float16,float16,0,0.7240533034006754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,2,128,1,float16,fp8,0,0.6009866793950399
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,1,128,1,float16,fp8,0,0.575925350189209
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,8,128,1,float16,fp8,0,0.5791253248850504
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,4,128,1,float16,float16,0,0.6221226851145426
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,4,128,1,float16,fp8,0,0.5762346585591634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,1,128,1,float16,float16,0,0.3192853331565857
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,2,128,1,float16,float16,0,0.32527466615041095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,8,128,1,float16,float16,0,0.6249226729075114
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,32,128,1,float16,float16,0,0.35824533303578693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,32,128,1,float16,fp8,0,0.31163734197616577
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,4,128,1,float16,float16,0,0.3203199903170268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,4,128,1,float16,fp8,0,0.29554132620493573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,8,128,1,float16,float16,0,0.3222293257713318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,8,128,1,float16,fp8,0,0.30988800525665283
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,32,128,1,float16,float16,0,0.2019360065460205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,32,128,1,float16,fp8,0,0.16528000434239706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,2,128,1,float16,float16,0,0.18435200055440268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,2,128,1,float16,fp8,0,0.1634773313999176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,1,128,1,float16,float16,0,0.18768533070882162
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,4,128,1,float16,float16,0,0.1861600081125895
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,4,128,1,float16,fp8,0,0.16749332348505655
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,8,128,1,float16,fp8,0,0.16408000389734903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,1,128,1,float16,float16,0,0.09650133053461711
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,1,128,1,float16,fp8,0,0.08665066957473755
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,32,128,1,float16,float16,0,0.09684266646703084
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,2,128,1,float16,float16,0,0.09499200185139973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,32,128,1,float16,fp8,0,0.09286933143933614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,2,128,1,float16,fp8,0,0.08562666177749634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,4,128,1,float16,float16,0,0.09296533465385437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,8,128,1,float16,float16,0,0.09971200426419576
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,8,128,1,float16,fp8,0,0.08708799878756206
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,1,128,1,float16,float16,0,0.05568000177542368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,1,128,1,float16,fp8,0,0.2962719996770223
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,1,128,1,float16,fp8,0,0.050437331199645996
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,32,128,1,float16,fp8,0,0.05384000142415365
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,32,128,1,float16,float16,0,0.0581279993057251
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,2,128,1,float16,fp8,0,0.04977599779764811
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,2,128,1,float16,float16,0,0.05412266651789347
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,4,128,1,float16,float16,0,0.05845866600672404
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,4,128,1,float16,fp8,0,0.052970667680104576
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,2,128,1,float16,fp8,0,0.3099413315455119
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,8,128,1,float16,float16,0,0.054986665646235146
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,8,128,1,float16,fp8,0,0.04979733129342397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,1,128,1,float16,float16,0,0.03882133215665817
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,1,128,1,float16,fp8,0,0.035002666215101876
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,2,128,1,float16,float16,0,0.03547733277082443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,32,128,1,float16,fp8,0,0.03489600121974945
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,2,128,1,float16,fp8,0,0.03443733354409536
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,32,128,1,float16,float16,0,0.03913066784540812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,4,128,1,float16,fp8,0,0.03145066648721695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,8,128,1,float16,float16,0,0.037621334195137024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,1,128,1,float16,fp8,0,0.15634133418401083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,8,128,1,float16,fp8,0,0.034490667283535004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,1,128,1,float16,float16,0,0.023946667710940044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,1,128,1,float16,fp8,0,0.02128000060717265
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,8,128,1,float16,float16,0,0.17174933354059854
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,32,128,1,float16,float16,0,0.02499733368555705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,2,128,1,float16,fp8,0,0.022319999833901722
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,4,128,1,float16,float16,0,0.02364266663789749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,4,128,1,float16,fp8,0,0.021344001094500225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,8,128,1,float16,float16,0,0.02306666721900304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,8,128,1,float16,fp8,0,0.021530665457248688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,4,128,1,float16,fp8,0,0.08561066786448161
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,1,128,1,float16,float16,0,0.9804533322652181
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,1,128,1,float16,fp8,0,0.8517653147379557
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,2,128,1,float16,float16,0,0.986080010732015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,2,128,1,float16,fp8,0,0.8510719935099283
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,4,128,1,float16,float16,0,0.037632000943024956
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,2,128,1,float16,float16,0,0.023823998868465424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,32,128,1,float16,fp8,0,0.020970667401949566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,4,128,1,float16,float16,0,0.9951519966125488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,4,128,1,float16,fp8,0,0.8564533392588297
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,32,128,1,float16,float16,0,0.4721120198567708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,1,128,1,float16,float16,0,0.45977067947387695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,32,128,1,float16,fp8,0,0.42998401323954266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,8,128,1,float16,float16,0,0.9138133525848389
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,8,128,1,float16,fp8,0,0.8610400358835856
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,2,128,1,float16,fp8,0,0.4261813163757324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,4,128,1,float16,float16,0,0.4609066645304362
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,2,128,1,float16,float16,0,0.4612373510996501
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,1,128,1,float16,fp8,0,0.4378133217493693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,4,128,1,float16,fp8,0,0.4266293446222941
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,2,128,1,float16,float16,0,0.2507466673851013
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,32,128,1,float16,fp8,0,0.22185067335764566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,1,128,1,float16,fp8,0,0.22719999154408774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,8,128,1,float16,float16,0,0.4634773333867391
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,1,128,1,float16,float16,0,0.2539626757303874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,2,128,1,float16,fp8,0,0.219866673151652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,4,128,1,float16,float16,0,0.251525342464447
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,8,128,1,float16,fp8,0,0.43751466274261475
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,4,128,1,float16,fp8,0,0.22075732549031576
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,8,128,1,float16,float16,0,0.24765866994857788
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,32,128,1,float16,float16,0,0.13012799620628357
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,32,128,1,float16,fp8,0,0.11839466293652852
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,2,128,1,float16,float16,0,0.12796266873677573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,8,128,1,float16,fp8,0,0.21996267636617026
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,4,128,1,float16,float16,0,0.13583999872207642
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,4,128,1,float16,fp8,0,0.12230400244394939
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,2,128,1,float16,fp8,0,0.12170132994651794
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,8,128,1,float16,float16,0,0.12732266386349997
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,8,128,1,float16,fp8,0,0.11664000153541565
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,1,128,1,float16,float16,0,0.07236266632874806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,32,128,1,float16,float16,0,0.2436586618423462
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,1,128,1,float16,fp8,0,0.1216373344262441
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,32,128,1,float16,float16,0,0.07351999978224437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,2,128,1,float16,fp8,0,0.06433600187301636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,32,128,1,float16,fp8,0,0.06612800061702728
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,1,128,1,float16,fp8,0,0.06654933094978333
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,2,128,1,float16,float16,0,0.07649600009123485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,4,128,1,float16,fp8,0,0.0662773350874583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,4,128,1,float16,float16,0,0.07644266883532207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,1,128,1,float16,float16,0,0.04342933495839437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,8,128,1,float16,float16,0,0.07315200070540111
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,1,128,1,float16,fp8,0,0.03757333258787791
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,2,128,1,float16,float16,0,0.04373333354791006
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,2,128,1,float16,fp8,0,0.038917332887649536
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,4,128,1,float16,fp8,0,0.03939199944337209
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,32,128,1,float16,float16,0,0.04778666794300079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,32,128,1,float16,fp8,0,0.03754133234421412
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,4,128,1,float16,float16,0,0.043749332427978516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,8,128,1,float16,float16,0,0.04347200194994608
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,1,128,1,float16,float16,0,0.13485866785049438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,8,128,1,float16,fp8,0,0.03730666637420654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,1,128,1,float16,fp8,0,0.025205334027608235
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,2,128,1,float16,float16,0,0.027493332823117573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,2,128,1,float16,fp8,0,0.025301332275072735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,32,128,1,float16,fp8,0,0.025386666258176167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,4,128,1,float16,float16,0,0.026719999810059864
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,32,128,1,float16,float16,0,0.026714667677879333
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,8,128,1,float16,float16,0,0.02738133321205775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,8,128,1,float16,fp8,0,0.02536533276240031
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,1,128,1,float16,float16,0,0.01718933383623759
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,2,128,1,float16,float16,0,0.017029333859682083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,32,128,1,float16,float16,0,0.0173333336909612
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,2,128,1,float16,fp8,0,0.016858667135238647
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,4,128,1,float16,float16,0,0.017152000218629837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,4,128,1,float16,fp8,0,0.01659199967980385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,8,128,1,float16,fp8,0,0.016106666376193363
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,8,128,1,float16,float16,0,0.016805333395799
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,1,128,1,float16,float16,0,0.01670933390657107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,1,128,1,float16,fp8,0,0.015103999525308609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,32,128,1,float16,float16,0,0.017103999853134155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,32,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,2,128,1,float16,float16,0,0.016927999754746754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,2,128,1,float16,fp8,0,0.014959999670584997
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,4,128,1,float16,float16,0,0.01676799977819125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,8,128,1,float16,fp8,0,0.06420266628265381
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,4,128,1,float16,fp8,0,0.016042667130629223
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,8,128,1,float16,float16,0,0.017050666113694508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,8,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,1,128,1,float16,float16,0,0.027744000156720478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,1,128,1,float16,float16,0,0.37540264924367267
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,4,128,1,float16,fp8,0,0.025279998779296875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,1,128,1,float16,fp8,0,0.34918399651845294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,2,128,1,float16,float16,0,0.37135998407999676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,1,128,1,float16,fp8,0,0.01664000004529953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,2,128,1,float16,fp8,0,0.3504319985707601
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,32,128,1,float16,fp8,0,0.015306666493415833
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,4,128,1,float16,float16,0,0.3691200017929077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,1,128,1,float16,float16,0,0.19096000989278158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,32,128,1,float16,fp8,0,0.17770665884017944
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,4,128,1,float16,fp8,0,0.349178671836853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,32,128,1,float16,float16,0,0.196015993754069
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,8,128,1,float16,float16,0,0.37756800651550293
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,8,128,1,float16,fp8,0,0.344810684521993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,1,128,1,float16,fp8,0,0.17713600397109985
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,2,128,1,float16,float16,0,0.19151999553044638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,2,128,1,float16,fp8,0,0.17756267388661703
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,4,128,1,float16,float16,0,0.19324266910552979
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,4,128,1,float16,fp8,0,0.18059200048446655
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,1,128,1,float16,float16,0,0.1011199951171875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,32,128,1,float16,float16,0,0.10497066378593445
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,8,128,1,float16,float16,0,0.19988266626993814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,1,128,1,float16,fp8,0,0.09594666957855225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,2,128,1,float16,float16,0,0.10281067093213399
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,2,128,1,float16,fp8,0,0.0927946666876475
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,4,128,1,float16,float16,0,0.10445866982142131
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,4,128,1,float16,fp8,0,0.09697600205739339
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,8,128,1,float16,fp8,0,0.09475200374921162
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,8,128,1,float16,float16,0,0.10540800293286641
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,1,128,1,float16,float16,0,0.057989334066708885
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,1,128,1,float16,fp8,0,0.050330668687820435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,32,128,1,float16,float16,0,0.0580266664425532
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,32,128,1,float16,fp8,0,0.052970667680104576
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,2,128,1,float16,float16,0,0.05609600245952606
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,4,128,1,float16,float16,0,0.05593066910902659
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,2,128,1,float16,fp8,0,0.052015999952952065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,1,128,1,float16,float16,0,0.0334346666932106
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,8,128,1,float16,float16,0,0.05762133498986562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,8,128,1,float16,fp8,0,0.054005334774653115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,1,128,1,float16,fp8,0,0.03140799949566523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,32,128,1,float16,float16,0,0.03623999903599421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,4,128,1,float16,float16,0,0.03356799980004629
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,2,128,1,float16,fp8,0,0.03159466634194056
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,4,128,1,float16,fp8,0,0.03148799886306127
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,8,128,1,float16,float16,0,0.03557866563399633
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,8,128,1,float16,fp8,0,0.03129599988460541
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,32,128,1,float16,fp8,0,0.09501333038012187
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,1,128,1,float16,float16,0,0.021087999145189922
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,8,128,1,float16,fp8,0,0.18078933159510294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,2,128,1,float16,float16,0,0.021114667256673176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,32,128,1,float16,float16,0,0.021183999876181286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,32,128,1,float16,fp8,0,0.019274666905403137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,2,128,1,float16,fp8,0,0.019167999426523846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,4,128,1,float16,fp8,0,0.019082666685183842
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,8,128,1,float16,float16,0,0.0198186660806338
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,8,128,1,float16,fp8,0,0.019050666441520054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,1,128,1,float16,float16,0,0.014629332969586054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,4,128,1,float16,fp8,0,0.049914668003718056
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,1,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,32,128,1,float16,float16,0,0.013605333864688873
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,32,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,2,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,4,128,1,float16,float16,0,0.012986666212479273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,4,128,1,float16,fp8,0,0.013221333424250284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,8,128,1,float16,float16,0,0.013194666554530462
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,8,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,1,128,1,float16,float16,0,0.012965332716703415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,32,128,1,float16,fp8,0,0.031471999982992806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,1,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,2,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,32,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,4,128,1,float16,float16,0,0.012975999464591345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,4,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,8,128,1,float16,float16,0,0.012960000584522883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,8,128,1,float16,fp8,0,0.012730666746695837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,1,128,1,float16,fp8,0,0.019445333629846573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,1,128,1,float16,float16,0,0.01293333371480306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,4,128,1,float16,float16,0,0.021141332884629566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,1,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,2,128,1,float16,float16,0,0.012954667210578918
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,32,128,1,float16,float16,0,0.012901333471139273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,2,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,4,128,1,float16,float16,0,0.012938667088747025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,4,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,8,128,1,float16,float16,0,0.013088000317414602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,8,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,2,128,1,float16,float16,0,0.014277332772811254
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,2,128,1,float16,float16,0,0.03531199942032496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,32,128,1,float16,float16,0,0.01313599944114685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,1,128,1,float16,float16,0,0.23706134160359701
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,1,128,1,float16,fp8,0,0.22708266973495483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,2,128,1,float16,float16,0,0.23810132344563803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,2,128,1,float16,fp8,0,0.22643733024597168
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,32,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,4,128,1,float16,float16,0,0.23801066478093466
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,8,128,1,float16,float16,0,0.23674132426579794
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,1,128,1,float16,float16,0,0.12773332993189493
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,32,128,1,float16,float16,0,0.1386186679204305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,32,128,1,float16,fp8,0,0.11914666493733723
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,4,128,1,float16,fp8,0,0.22539732853571573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,1,128,1,float16,fp8,0,0.11874133348464966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,2,128,1,float16,float16,0,0.12685333689053854
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,2,128,1,float16,fp8,0,0.11741333206494649
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,2,128,1,float16,float16,0,0.013104000439246496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,8,128,1,float16,fp8,0,0.11718933780988057
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,8,128,1,float16,float16,0,0.12426132957140605
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,32,128,1,float16,float16,0,0.07389333347479503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,32,128,1,float16,fp8,0,0.06358399987220764
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,1,128,1,float16,float16,0,0.0690826674302419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,4,128,1,float16,float16,0,0.12773866454760233
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,4,128,1,float16,fp8,0,0.11904533704121907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,4,128,1,float16,float16,0,0.06618666648864746
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,1,128,1,float16,fp8,0,0.06230400005976359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,2,128,1,float16,float16,0,0.06821866830190022
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,2,128,1,float16,fp8,0,0.06237333516279856
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,8,128,1,float16,fp8,0,0.0620959997177124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,8,128,1,float16,fp8,0,0.22570133209228516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,1,128,1,float16,float16,0,0.03697066754102707
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,8,128,1,float16,float16,0,0.06837333242098491
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,4,128,1,float16,fp8,0,0.06304533282915752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,32,128,1,float16,fp8,0,0.033573334415753685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,2,128,1,float16,fp8,0,0.033759998778502144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,1,128,1,float16,fp8,0,0.033573334415753685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,4,128,1,float16,fp8,0,0.035487999518712364
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,4,128,1,float16,float16,0,0.03763733307520548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,8,128,1,float16,float16,0,0.03762666632731756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,8,128,1,float16,fp8,0,0.03345600018898646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,32,128,1,float16,float16,0,0.02334933231274287
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,1,128,1,float16,float16,0,0.023344000180562336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,32,128,1,float16,fp8,0,0.022831998765468597
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,2,128,1,float16,float16,0,0.022976001103719074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,1,128,1,float16,fp8,0,0.023258666197458904
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,4,128,1,float16,float16,0,0.023402666052182514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,8,128,1,float16,fp8,0,0.021914665897687275
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,8,128,1,float16,float16,0,0.023045333723227184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,2,128,1,float16,fp8,0,0.02294933299223582
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,1,128,1,float16,float16,0,0.014991999914248785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,1,128,1,float16,fp8,0,0.01481066644191742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,2,128,1,float16,float16,0,0.014783999572197596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,2,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,32,128,1,float16,float16,0,0.014922666052977243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,4,128,1,float16,float16,0,0.014943999548753103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,4,128,1,float16,fp8,0,0.014805333067973455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,8,128,1,float16,fp8,0,0.014741333822409311
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,8,128,1,float16,float16,0,0.014864000181357065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,1,128,1,float16,float16,0,0.010821333775917688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,32,128,1,float16,float16,0,0.040847999354203544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,1,128,1,float16,fp8,0,0.010762666662534079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,32,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,4,128,1,float16,float16,0,0.010949333508809408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,2,128,1,float16,fp8,0,0.011055999745925268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,2,128,1,float16,float16,0,0.011007999380429586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,4,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,8,128,1,float16,float16,0,0.01081066702802976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,8,128,1,float16,fp8,0,0.010847999403874079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,1,128,1,float16,float16,0,0.010757333288590113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,1,128,1,float16,fp8,0,0.010874666273593903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,2,128,1,float16,float16,0,0.03770133356253306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,2,128,1,float16,float16,0,0.010794666906197866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,2,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,32,128,1,float16,fp8,0,0.010672000547250112
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,4,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,32,128,1,float16,float16,0,0.010677333921194077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,4,128,1,float16,float16,0,0.01109333336353302
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,8,128,1,float16,float16,0,0.010928000013033548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,8,128,1,float16,fp8,0,0.010480000327030817
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,1,128,1,float16,float16,0,0.011002667248249054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,1,128,1,float16,fp8,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,2,128,1,float16,fp8,0,0.010928000013033548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,2,128,1,float16,float16,0,0.010640000303586325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,4,128,1,float16,float16,0,0.009984000275532404
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,4,128,1,float16,fp8,0,0.010298666854699453
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,32,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,8,128,1,float16,fp8,0,0.009061333412925402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,8,128,1,float16,float16,0,0.010757333288590113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,32,128,1,float16,float16,0,0.010826667149861654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,1,128,1,float16,float16,0,0.2016106645266215
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,1,128,1,float16,fp8,0,0.1911840041478475
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,2,128,1,float16,fp8,0,0.19260267416636148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,4,128,1,float16,float16,0,0.20139199495315552
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,4,128,1,float16,fp8,0,0.19114667177200317
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,4,128,1,float16,fp8,0,0.02317333221435547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,8,128,1,float16,fp8,0,0.19113065799077353
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,8,128,1,float16,float16,0,0.20131200551986694
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,32,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,32,128,1,float16,float16,0,0.010128000130256018
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,1,128,1,float16,float16,0,0.10860266288121541
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,32,128,1,float16,float16,0,0.10773332913716634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,32,128,1,float16,fp8,0,0.10109866658846538
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,2,128,1,float16,fp8,0,0.09902933239936829
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,4,128,1,float16,float16,0,0.10538132985432942
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,1,128,1,float16,fp8,0,0.10106666882832845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,2,128,1,float16,float16,0,0.10731732845306396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,4,128,1,float16,fp8,0,0.10094933708508809
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,1,128,1,float16,float16,0,0.058037335673967995
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,1,128,1,float16,fp8,0,0.054133335749308266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,32,128,1,float16,fp8,0,0.0525546669960022
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,2,128,1,float16,float16,0,0.2010186711947123
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,32,128,1,float16,float16,0,0.05932799975077311
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,2,128,1,float16,fp8,0,0.0537066658337911
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,8,128,1,float16,float16,0,0.10731732845306396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,2,128,1,float16,float16,0,0.058778668443361916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,8,128,1,float16,fp8,0,0.10109333197275798
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,4,128,1,float16,float16,0,0.058277333776156105
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,1,128,1,float16,float16,0,0.033471999069054924
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,4,128,1,float16,fp8,0,0.05378133555253347
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,2,128,1,float16,float16,0,0.033520000676314034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,8,128,1,float16,fp8,0,0.05385066568851471
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,2,128,1,float16,fp8,0,0.029509333272775013
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,32,128,1,float16,fp8,0,0.029530666768550873
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,4,128,1,float16,float16,0,0.03142933299144109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,8,128,1,float16,float16,0,0.05892266829808553
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,4,128,1,float16,fp8,0,0.02939733366171519
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,8,128,1,float16,float16,0,0.03169066707293192
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,8,128,1,float16,fp8,0,0.029696000119050343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,1,128,1,float16,float16,0,0.021040000021457672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,32,128,1,float16,float16,0,0.021013334393501282
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,1,128,1,float16,fp8,0,0.019285333653291065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,2,128,1,float16,fp8,0,0.01887999971707662
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,32,128,1,float16,fp8,0,0.019152000546455383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,4,128,1,float16,float16,0,0.020938667158285778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,4,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,8,128,1,float16,float16,0,0.02089066555102666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,8,128,1,float16,fp8,0,0.01904533306757609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,1,128,1,float16,float16,0,0.012965332716703415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,1,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,2,128,1,float16,float16,0,0.013007999708255133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,2,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,32,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,4,128,1,float16,float16,0,0.013082666943470636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,4,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,8,128,1,float16,float16,0,0.013354666531085968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,8,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,1,128,1,float16,float16,0,0.010693332801262537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,1,128,1,float16,fp8,0,0.009141333401203156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,2,128,1,float16,float16,0,0.0107893335322539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,32,128,1,float16,float16,0,0.010874666273593903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,32,128,1,float16,float16,0,0.03321066747109095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,2,128,1,float16,fp8,0,0.009290666629870733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,1,128,1,float16,fp8,0,0.030159999926884968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,4,128,1,float16,float16,0,0.01101333275437355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,8,128,1,float16,float16,0,0.010805333654085795
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,8,128,1,float16,fp8,0,0.00922133338948091
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,4,128,1,float16,fp8,0,0.010490667074918747
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,1,128,1,float16,float16,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,1,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,32,128,1,float16,float16,0,0.009557333464423815
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,2,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,32,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,2,128,1,float16,float16,0,0.020981334149837494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,4,128,1,float16,float16,0,0.009018666421373686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,4,128,1,float16,fp8,0,0.008810666700204214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,8,128,1,float16,float16,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,8,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,1,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,2,128,1,float16,float16,0,0.009359999870260557
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,2,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,32,128,1,float16,fp8,0,0.008842666943868002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,4,128,1,float16,float16,0,0.009226666763424873
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,32,128,1,float16,float16,0,0.009136000027259191
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,32,128,1,float16,float16,0,0.014458666245142618
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,8,128,1,float16,float16,0,0.009226666763424873
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,8,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,32,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,1,128,1,float16,float16,0,0.1864266594250997
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,1,128,1,float16,fp8,0,0.17275200287501016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,2,128,1,float16,fp8,0,0.1725920041402181
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,2,128,1,float16,float16,0,0.18326934178670248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,2,128,1,float16,float16,0,0.009946666657924652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,4,128,1,float16,fp8,0,0.1727893352508545
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,8,128,1,float16,float16,0,0.18092266718546549
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,1,128,1,float16,float16,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,32,128,1,float16,float16,0,0.09783466657002766
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,8,128,1,float16,fp8,0,0.17246933778127035
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,4,128,1,float16,fp8,0,0.008656000097592672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,32,128,1,float16,fp8,0,0.09087466200192769
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,1,128,1,float16,fp8,0,0.0906986693541209
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,2,128,1,float16,float16,0,0.09485866626103719
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,4,128,1,float16,float16,0,0.0969546635945638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,2,128,1,float16,fp8,0,0.09251733620961507
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,4,128,1,float16,fp8,0,0.09009066224098206
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,8,128,1,float16,fp8,0,0.09037866195042928
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,1,128,1,float16,float16,0,0.05385066568851471
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,32,128,1,float16,float16,0,0.053818667928377785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,32,128,1,float16,fp8,0,0.04875733455022176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,1,128,1,float16,fp8,0,0.04898133377234141
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,2,128,1,float16,float16,0,0.05273066461086273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,2,128,1,float16,fp8,0,0.048170665899912514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,4,128,1,float16,float16,0,0.053157334526379905
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,8,128,1,float16,float16,0,0.05180799961090088
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,8,128,1,float16,fp8,0,0.047770669062932335
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,4,128,1,float16,fp8,0,0.04926399886608124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,1,128,1,float16,float16,0,0.03092266619205475
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,32,128,1,float16,float16,0,0.03146666785081228
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,2,128,1,float16,float16,0,0.03012266755104065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,32,128,1,float16,fp8,0,0.02737066646416982
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,4,128,1,float16,float16,0,0.18523732821146646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,2,128,1,float16,fp8,0,0.027109332382678986
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,4,128,1,float16,fp8,0,0.027285332481066387
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,4,128,1,float16,float16,0,0.03146133323510488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,8,128,1,float16,float16,0,0.029994666576385498
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,8,128,1,float16,fp8,0,0.029322666426499683
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,1,128,1,float16,fp8,0,0.019109333554903667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,1,128,1,float16,float16,0,0.09875200192133586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,1,128,1,float16,float16,0,0.019199999670187633
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,32,128,1,float16,float16,0,0.01915733392039935
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,32,128,1,float16,fp8,0,0.018911999960740406
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,2,128,1,float16,float16,0,0.020517333100239437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,4,128,1,float16,fp8,0,0.018965333700180054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,4,128,1,float16,float16,0,0.019173332800467808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,8,128,1,float16,float16,0,0.019002666076024372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,8,128,1,float16,fp8,0,0.019178666174411774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,1,128,1,float16,float16,0,0.012725333372751871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,1,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,32,128,1,float16,float16,0,0.012981332838535309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,2,128,1,float16,float16,0,0.012981332838535309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,32,128,1,float16,fp8,0,0.012794667234023413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,4,128,1,float16,float16,0,0.013056000073750814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,4,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,8,128,1,float16,float16,0,0.013023999830087027
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,8,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,1,128,1,float16,float16,0,0.010053333515922228
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,32,128,1,float16,float16,0,0.010591999938090643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,32,128,1,float16,fp8,0,0.009088000282645226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,1,128,1,float16,fp8,0,0.009258666386206945
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,1,128,1,float16,fp8,0,0.027349332968393963
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,2,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,4,128,1,float16,float16,0,0.010368000095089277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,4,128,1,float16,fp8,0,0.009093333035707474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,8,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,32,128,1,float16,float16,0,0.009248000259200731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,1,128,1,float16,float16,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,2,128,1,float16,fp8,0,0.01883200059334437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,1,128,1,float16,fp8,0,0.00871999996403853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,2,128,1,float16,float16,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,2,128,1,float16,fp8,0,0.008832000195980072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,4,128,1,float16,float16,0,0.008832000195980072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,4,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,8,128,1,float16,float16,0,0.009109333157539368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,8,128,1,float16,float16,0,0.09730133414268494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,8,128,1,float16,fp8,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,2,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,1,128,1,float16,float16,0,0.008623999853928884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,32,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,32,128,1,float16,float16,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,2,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,4,128,1,float16,float16,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,4,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,8,128,1,float16,float16,0,0.008650666723648706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,8,128,1,float16,fp8,0,0.008799999952316284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,2,128,1,float16,float16,0,0.010319999729593595
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,1,128,1,float16,float16,0,0.17749333381652832
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,1,128,1,float16,fp8,0,0.1670773426691691
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,2,128,1,float16,float16,0,0.17781333128611246
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,2,128,1,float16,fp8,0,0.16639467080434164
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,32,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,4,128,1,float16,fp8,0,0.16805867354075113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,8,128,1,float16,float16,0,0.1783413290977478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,1,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,2,128,1,float16,float16,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,8,128,1,float16,fp8,0,0.16633599996566772
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,32,128,1,float16,float16,0,0.09636800487836202
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,1,128,1,float16,float16,0,0.09524800380071004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,1,128,1,float16,fp8,0,0.08688533306121826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,32,128,1,float16,fp8,0,0.08854400118192036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,2,128,1,float16,fp8,0,0.08672533432642619
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,2,128,1,float16,float16,0,0.09790933132171631
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,4,128,1,float16,fp8,0,0.08680533369382222
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,4,128,1,float16,float16,0,0.09829333424568176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,8,128,1,float16,float16,0,0.09403199950853984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,8,128,1,float16,fp8,0,0.08878399928410848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,8,128,1,float16,float16,0,0.010879999647537867
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,32,128,1,float16,float16,0,0.05356800059477488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,32,128,1,float16,fp8,0,0.047695999344189964
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,1,128,1,float16,float16,0,0.05322133501370748
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,2,128,1,float16,float16,0,0.05231466889381409
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,1,128,1,float16,fp8,0,0.04823466638724009
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,2,128,1,float16,fp8,0,0.04794133206208547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,4,128,1,float16,float16,0,0.05156800150871277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,4,128,1,float16,fp8,0,0.04753066599369049
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,8,128,1,float16,float16,0,0.05190933247407278
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,1,128,1,float16,float16,0,0.03124266614516576
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,1,128,1,float16,fp8,0,0.027461332579453785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,4,128,1,float16,float16,0,0.17782400051752725
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,32,128,1,float16,float16,0,0.03054933249950409
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,32,128,1,float16,fp8,0,0.027471999327341717
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,4,128,1,float16,float16,0,0.031061333914597828
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,2,128,1,float16,float16,0,0.03049066662788391
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,4,128,1,float16,fp8,0,0.027093333502610523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,1,128,1,float16,float16,0,0.01915733392039935
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,1,128,1,float16,fp8,0,0.018906666586796444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,8,128,1,float16,float16,0,0.03140799949566523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,2,128,1,float16,float16,0,0.018933333456516266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,2,128,1,float16,fp8,0,0.018965333700180054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,32,128,1,float16,fp8,0,0.018816000471512478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,4,128,1,float16,float16,0,0.019551999866962433
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,4,128,1,float16,fp8,0,0.01740266631046931
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,32,128,1,float16,float16,0,0.01926933353145917
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,8,128,1,float16,fp8,0,0.018885333091020584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,8,128,1,float16,float16,0,0.019173332800467808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,1,128,1,float16,float16,0,0.013072000195582708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,1,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,32,128,1,float16,fp8,0,0.012800000607967377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,32,128,1,float16,float16,0,0.013253333667914072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,2,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,4,128,1,float16,float16,0,0.013061333447694778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,4,128,1,float16,fp8,0,0.012714666624863943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,8,128,1,float16,float16,0,0.012986666212479273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,8,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,1,128,1,float16,float16,0,0.010597333312034607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,32,128,1,float16,float16,0,0.010858666151762009
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,1,128,1,float16,fp8,0,0.009381333366036415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,2,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,8,128,1,float16,fp8,0,0.047744000951449074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,2,128,1,float16,float16,0,0.010682666053374609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,4,128,1,float16,float16,0,0.010575999816258749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,8,128,1,float16,float16,0,0.010751999914646149
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,2,128,1,float16,fp8,0,0.027349332968393963
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,8,128,1,float16,fp8,0,0.009279999881982803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,1,128,1,float16,float16,0,0.009285333255926767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,8,128,1,float16,fp8,0,0.027162666122118633
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,1,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,2,128,1,float16,float16,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,2,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,32,128,1,float16,float16,0,0.009077333534757296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,32,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,4,128,1,float16,float16,0,0.008789333204428354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,4,128,1,float16,fp8,0,0.008805333326260248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,8,128,1,float16,float16,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,8,128,1,float16,fp8,0,0.008805333326260248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,2,128,1,float16,float16,0,0.012752000242471695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,1,128,1,float16,fp8,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,32,128,1,float16,float16,0,0.0086666668454806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,32,128,1,float16,fp8,0,0.009290666629870733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,4,128,1,float16,float16,0,0.00921066664159298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,4,128,1,float16,fp8,0,0.009061333412925402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,8,128,1,float16,float16,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,8,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,32,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,4,128,1,float16,fp8,0,0.008832000195980072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,1,128,1,float16,float16,0,0.012650666137536367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,1,128,1,float16,fp8,0,0.011205332974592844
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,0,0.014783999572197596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,0,0.016864000509182613
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,0,0.014912000546852747
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,0,0.02733866622050603
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,0,0.023269332945346832
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,2,128,1,float16,float16,0,0.0086666668454806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,1,128,1,float16,float16,0,0.008837333569924036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,32,128,1,float16,float16,0,0.05348266661167145
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,1,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,0,0.010645333677530289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,0,0.0120319997270902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,0,0.01718933383623759
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,1,128,1,float16,float16,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,1,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,32,128,1,float16,float16,0,0.027109332382678986
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,0,0.008826666822036108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,32,128,1,float16,fp8,0,0.023221333821614582
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,0,0.007231999809543292
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,0,0.010687999427318573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,0,0.01099733387430509
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,1,128,1,float16,fp8,0,0.008576000109314919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,1,128,1,float16,float16,0,0.008623999853928884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,32,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,0,0.008565333361426989
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,1,128,1,float16,float16,0,0.008634666601816813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,0,0.00860799973209699
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,2,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,1,128,1,float16,float16,0,0.00877333308259646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,32,128,1,float16,fp8,0,0.039408000806967415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,1,128,1,float16,fp8,0,0.00850133349498113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,0,0.007333333293596904
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,0,0.006735999758044879
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,0,0.0069759997228781385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,0,0.008592000231146812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,0,0.006906666482488315
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,1,128,1,float16,float16,0,0.008576000109314919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,1,128,1,float16,fp8,0,0.008623999853928884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,32,128,1,float16,fp8,0,0.008565333361426989
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,0,0.0068853336075941724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,0,0.006837333242098491
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,0,0.006773333375652631
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,0,0.006618666773041089
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,0,0.007029333462317784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,0,0.006917333230376244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,32,128,1,float16,float16,0,0.016949333250522614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,1,128,1,float16,float16,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,32,128,1,float16,fp8,0,0.006954666847983996
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,0,0.0069440001000960665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,0,0.006954666847983996
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,32,128,1,float16,float16,0,0.008576000109314919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,1,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,0,0.007007999966541926
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,0,0.0068800002336502075
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,0,0.00707733320693175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,0,0.006682666639486949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,0,0.006575999781489372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,1,128,1,float16,float16,0,0.008703999842206636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,32,128,1,float16,float16,0,0.012842666357755661
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,1,128,1,float16,fp8,0,0.008645333349704742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,32,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,32,128,1,float16,fp8,0,0.006949333474040031
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,32,128,1,float16,float16,0,0.0068693334857622785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,0,0.006890666360656421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,0,0.006629333520929019
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,0,0.006853333363930385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,0,0.0068800002336502075
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,0,0.006613333399097125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,0,0.006506666541099548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,1,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,1,128,1,float16,fp8,0,0.007989333321650824
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,0,0.007018666714429855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,0,0.006784000123540561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,32,128,1,float16,fp8,0,0.006917333230376244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,0,0.006831999868154526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,0,0.006698666761318843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,0,0.006874666859706243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,0,0.006864000111818314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,0,0.006677333265542984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,32,128,1,float16,float16,0,0.006864000111818314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,32,128,1,float16,float16,0,0.008805333326260248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,1,128,1,float16,fp8,0,7.904821395874023
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,1,128,1,float16,float16,0,8.256682713826498
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,2,128,1,float16,fp8,0,8.009893417358398
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,2,128,1,float16,float16,0,8.672837575276693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,4,128,1,float16,float16,0,8.264202753702799
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,24,128,1,float16,float16,0,4.203349431355794
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,24,128,1,float16,fp8,0,4.074325243631999
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,1,128,1,float16,float16,0,5.475018819173177
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,1,128,1,float16,fp8,0,4.081573486328125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,4,128,1,float16,fp8,0,7.626735687255859
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,8,128,1,float16,float16,0,8.263546625773111
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,8,128,1,float16,fp8,0,8.0153439839681
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,2,128,1,float16,fp8,0,3.870997428894043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,2,128,1,float16,float16,0,5.681466420491536
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,4,128,1,float16,float16,0,4.330602645874023
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,24,128,1,float16,fp8,0,2.1228639284769693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,8,128,1,float16,float16,0,4.197466532389323
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,4,128,1,float16,fp8,0,4.450768152872722
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,24,128,1,float16,float16,0,2.3036425908406577
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,1,128,1,float16,float16,0,2.3030773798624673
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,1,128,1,float16,fp8,0,2.4054293632507324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,2,128,1,float16,float16,0,2.627562681833903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,2,128,1,float16,fp8,0,1.9950079917907715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,4,128,1,float16,fp8,0,2.019909381866455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,4,128,1,float16,float16,0,2.620405356089274
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,8,128,1,float16,float16,0,2.341818650563558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,8,128,1,float16,fp8,0,2.059178670247396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,24,128,1,float16,float16,0,1.2560213406880696
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,1,128,1,float16,float16,0,1.2188426653544109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,1,128,1,float16,fp8,0,1.1262719631195068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,24,128,1,float16,fp8,0,1.1774933338165283
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,2,128,1,float16,float16,0,1.3754879633585613
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,2,128,1,float16,fp8,0,1.2531893253326416
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,4,128,1,float16,float16,0,1.2181119918823242
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,4,128,1,float16,fp8,0,1.1173333326975505
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,8,128,1,float16,float16,0,1.215135971705119
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,8,128,1,float16,fp8,0,3.8819147745768228
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,2,128,1,float16,fp8,0,4.635802586873372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,2,128,1,float16,float16,0,4.8802188237508135
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,1,128,1,float16,fp8,0,4.931071917215983
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,4,128,1,float16,fp8,0,4.413829485575358
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,1,128,1,float16,float16,0,6.257162729899089
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,4,128,1,float16,float16,0,5.683194478352864
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,8,128,1,float16,fp8,0,1.0617760022481282
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,24,128,1,float16,fp8,0,2.2813013394673667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,24,128,1,float16,float16,0,2.444693406422933
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,1,128,1,float16,float16,0,3.000783920288086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,8,128,1,float16,float16,0,6.081706364949544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,1,128,1,float16,fp8,0,2.8491732279459634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,2,128,1,float16,float16,0,3.1474987665812173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,8,128,1,float16,fp8,0,4.69489065806071
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,2,128,1,float16,fp8,0,2.2524266242980957
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,24,128,1,float16,float16,0,1.2862186431884766
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,4,128,1,float16,fp8,0,2.4001439412434897
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,4,128,1,float16,float16,0,2.978303909301758
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,1,128,1,float16,float16,0,1.5530133247375488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,8,128,1,float16,float16,0,2.964853286743164
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,24,128,1,float16,fp8,0,1.4114559491475422
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,8,128,1,float16,fp8,0,2.658682664235433
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,1,128,1,float16,fp8,0,1.2476906776428223
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,2,128,1,float16,fp8,0,1.1705866654713948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,2,128,1,float16,float16,0,1.530687967936198
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,4,128,1,float16,float16,0,1.650485356648763
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,24,128,1,float16,fp8,0,0.6614453395207723
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,8,128,1,float16,float16,0,1.5435946782430012
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,8,128,1,float16,fp8,0,1.3310453097025554
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,1,128,1,float16,fp8,0,0.7436853249867758
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,2,128,1,float16,fp8,0,0.6715839703877767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,2,128,1,float16,float16,0,0.8443253040313721
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,4,128,1,float16,float16,0,0.7435146967569987
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,4,128,1,float16,fp8,0,0.6939786275227865
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,8,128,1,float16,float16,0,0.7256800333658854
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,8,128,1,float16,fp8,0,0.6736799875895182
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,1,128,1,float16,fp8,0,3.173551877339681
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,1,128,1,float16,float16,0,4.145450592041016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,2,128,1,float16,float16,0,4.451381365458171
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,2,128,1,float16,fp8,0,3.3740905125935874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,4,128,1,float16,float16,0,3.8624267578125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,4,128,1,float16,fp8,0,1.2488000392913818
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,24,128,1,float16,float16,0,0.7427946726481119
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,1,128,1,float16,float16,0,0.7284853458404541
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,4,128,1,float16,fp8,0,3.1400585174560547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,24,128,1,float16,float16,0,1.8931360244750977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,1,128,1,float16,float16,0,1.7395200729370117
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,8,128,1,float16,fp8,0,3.1386613845825195
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,1,128,1,float16,fp8,0,1.7118186950683594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,8,128,1,float16,float16,0,3.7146132787068686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,2,128,1,float16,float16,0,1.8101280530293782
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,2,128,1,float16,fp8,0,1.699445406595866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,4,128,1,float16,float16,0,1.7436480522155762
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,4,128,1,float16,fp8,0,1.6080692609151204
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,8,128,1,float16,fp8,0,1.7169013023376465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,8,128,1,float16,float16,0,1.7649067242940266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,1,128,1,float16,fp8,0,0.899242639541626
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,1,128,1,float16,float16,0,1.0868159929911296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,24,128,1,float16,fp8,0,0.9029333591461182
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,2,128,1,float16,float16,0,0.9308319886525472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,2,128,1,float16,fp8,0,0.9151679674784342
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,4,128,1,float16,fp8,0,0.8987627029418945
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,4,128,1,float16,float16,0,1.0884586970011394
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,8,128,1,float16,fp8,0,0.897711992263794
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,24,128,1,float16,float16,0,0.5034933487574259
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,1,128,1,float16,float16,0,0.5749919811884562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,24,128,1,float16,fp8,0,0.49134401480356854
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,2,128,1,float16,float16,0,0.5328373511632284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,2,128,1,float16,fp8,0,0.49116798241933185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,24,128,1,float16,fp8,0,1.669808069864909
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,4,128,1,float16,float16,0,0.5481866598129272
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,4,128,1,float16,fp8,0,0.4906239906946818
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,8,128,1,float16,float16,0,0.5010720094045004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,24,128,1,float16,float16,0,0.9156959851582845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,1,128,1,float16,float16,0,4.570042610168457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,2,128,1,float16,float16,0,4.397712071736653
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,2,128,1,float16,fp8,0,4.367168108622233
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,1,128,1,float16,fp8,0,0.4896586736043294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,4,128,1,float16,float16,0,4.609408060709636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,8,128,1,float16,fp8,0,0.45970133940378827
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,24,128,1,float16,float16,0,2.245936075846354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,8,128,1,float16,fp8,0,4.088042577107747
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,8,128,1,float16,float16,0,4.403733253479004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,1,128,1,float16,float16,0,2.236426671346029
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,24,128,1,float16,fp8,0,2.096757411956787
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,1,128,1,float16,fp8,0,4.344448089599609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,1,128,1,float16,fp8,0,2.0717013676961265
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,8,128,1,float16,float16,0,0.9634186426798502
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,2,128,1,float16,fp8,0,2.2171093622843423
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,2,128,1,float16,float16,0,2.7351999282836914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,4,128,1,float16,fp8,0,2.1642187436421714
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,1,128,1,float16,float16,0,1.5372533798217773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,8,128,1,float16,float16,0,2.2386347452799478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,24,128,1,float16,float16,0,1.245418628056844
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,24,128,1,float16,fp8,0,1.0703252951304119
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,8,128,1,float16,fp8,0,2.219045321146647
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,1,128,1,float16,fp8,0,1.1431360244750977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,2,128,1,float16,fp8,0,1.0686933199564617
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,2,128,1,float16,float16,0,1.3676106135050456
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,4,128,1,float16,fp8,0,1.0700639883677165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,4,128,1,float16,fp8,0,4.426959991455078
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,8,128,1,float16,fp8,0,1.1431199709574382
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,4,128,1,float16,float16,0,1.4168799718221028
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,8,128,1,float16,float16,0,1.1548799673716228
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,1,128,1,float16,float16,0,0.6136480172475179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,1,128,1,float16,fp8,0,0.6049813429514567
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,2,128,1,float16,float16,0,0.6567253271738688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,2,128,1,float16,fp8,0,0.6416320006052653
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,24,128,1,float16,float16,0,0.6168266534805298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,24,128,1,float16,fp8,0,0.590175986289978
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,4,128,1,float16,float16,0,0.721349318822225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,4,128,1,float16,fp8,0,0.5672426621119181
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,8,128,1,float16,fp8,0,0.6064906517664591
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,8,128,1,float16,float16,0,0.6897173722585043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,1,128,1,float16,float16,0,0.3901866674423218
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,2,128,1,float16,float16,0,0.34594134489695233
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,24,128,1,float16,fp8,0,0.3168320059776306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,24,128,1,float16,float16,0,0.35596267382303876
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,2,128,1,float16,fp8,0,0.3393760124842326
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,4,128,1,float16,float16,0,2.2370932896931968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,4,128,1,float16,fp8,0,0.3434400161107381
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,8,128,1,float16,float16,0,0.34301865100860596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,8,128,1,float16,fp8,0,0.31514134009679157
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,1,128,1,float16,float16,0,2.603882630666097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,1,128,1,float16,fp8,0,2.409125328063965
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,2,128,1,float16,float16,0,2.5903894106547036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,2,128,1,float16,fp8,0,2.587674617767334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,4,128,1,float16,float16,0,2.5902613004048667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,1,128,1,float16,float16,0,1.5946027437845867
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,24,128,1,float16,float16,0,1.3323573271433513
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,8,128,1,float16,float16,0,2.5949172973632812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,8,128,1,float16,fp8,0,2.4114559491475425
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,1,128,1,float16,fp8,0,0.3381813367207845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,1,128,1,float16,fp8,0,1.3161653677622478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,4,128,1,float16,float16,0,0.3696639935175578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,2,128,1,float16,fp8,0,1.2305333614349365
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,2,128,1,float16,float16,0,1.6416160265604656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,4,128,1,float16,float16,0,1.3242346445719402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,4,128,1,float16,fp8,0,1.3154773712158203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,8,128,1,float16,float16,0,1.3615679740905762
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,8,128,1,float16,fp8,0,1.3161546389261882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,1,128,1,float16,float16,0,0.8251307010650635
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,24,128,1,float16,float16,0,0.6950720151265463
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,1,128,1,float16,fp8,0,0.6959520181020101
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,2,128,1,float16,float16,0,0.7380586465199789
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,4,128,1,float16,fp8,0,2.409850597381592
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,4,128,1,float16,float16,0,0.7366666793823242
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,4,128,1,float16,fp8,0,0.6902453104654948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,8,128,1,float16,fp8,0,0.6853066285451254
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,24,128,1,float16,float16,0,0.40321600437164307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,1,128,1,float16,float16,0,0.40377600987752277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,24,128,1,float16,fp8,0,1.2356053193410237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,24,128,1,float16,fp8,0,0.34909868240356445
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,1,128,1,float16,fp8,0,0.3708000183105469
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,2,128,1,float16,float16,0,0.3754613399505615
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,2,128,1,float16,fp8,0,0.37226665019989014
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,4,128,1,float16,fp8,0,0.3697333335876465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,8,128,1,float16,float16,0,0.3757813374201457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,8,128,1,float16,fp8,0,0.37187735239664715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,1,128,1,float16,float16,0,0.23819732666015625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,24,128,1,float16,float16,0,0.22632533311843872
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,24,128,1,float16,fp8,0,0.2009013295173645
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,2,128,1,float16,float16,0,0.24359999100367227
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,2,128,1,float16,fp8,0,0.19842666387557983
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,24,128,1,float16,fp8,0,0.6571413278579712
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,2,128,1,float16,fp8,0,0.6857386430104574
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,4,128,1,float16,float16,0,0.23771200577418009
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,8,128,1,float16,float16,0,0.2178773283958435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,8,128,1,float16,fp8,0,0.19873066743214926
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,8,128,1,float16,float16,0,0.6918559869130453
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,1,128,1,float16,float16,0,2.630133310953776
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,4,128,1,float16,float16,0,0.42932267983754474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,1,128,1,float16,fp8,0,2.308085282643636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,2,128,1,float16,float16,0,2.4674827257792153
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,2,128,1,float16,fp8,0,2.307055950164795
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,4,128,1,float16,float16,0,2.4699947039286294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,1,128,1,float16,fp8,0,0.21340266863505045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,4,128,1,float16,fp8,0,2.453829288482666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,24,128,1,float16,float16,0,1.2713653246561687
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,24,128,1,float16,fp8,0,1.1792106628417969
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,1,128,1,float16,float16,0,1.2539520263671875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,8,128,1,float16,fp8,0,2.311253388722738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,4,128,1,float16,fp8,0,0.19742933909098306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,1,128,1,float16,fp8,0,1.2251733144124348
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,2,128,1,float16,float16,0,1.2575519879659016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,2,128,1,float16,fp8,0,1.2500533262888591
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,4,128,1,float16,float16,0,1.4665013949076335
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,8,128,1,float16,fp8,0,1.1741920312245686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,8,128,1,float16,float16,0,1.341119925181071
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,24,128,1,float16,float16,0,0.6570080121358236
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,1,128,1,float16,float16,0,0.7705492973327637
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,1,128,1,float16,fp8,0,0.6449439922968546
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,24,128,1,float16,fp8,0,0.6089226802190145
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,2,128,1,float16,float16,0,0.6481599807739258
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,2,128,1,float16,fp8,0,0.6458880106608073
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,4,128,1,float16,float16,0,0.7683839797973633
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,4,128,1,float16,fp8,0,0.6447626749674479
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,8,128,1,float16,fp8,0,0.6461280186971029
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,8,128,1,float16,float16,0,0.6501546700795492
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,24,128,1,float16,float16,0,0.3571679989496867
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,24,128,1,float16,fp8,0,0.3243946631749471
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,1,128,1,float16,float16,0,0.39578131834665936
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,1,128,1,float16,fp8,0,0.3384480079015096
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,2,128,1,float16,float16,0,0.36877866586049396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,2,128,1,float16,fp8,0,0.320853332678477
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,4,128,1,float16,float16,0,0.34546132882436115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,4,128,1,float16,fp8,0,0.34384532769521076
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,8,128,1,float16,float16,0,0.34753600756327313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,8,128,1,float16,fp8,0,0.32232532898585003
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,1,128,1,float16,float16,0,0.2206559975941976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,8,128,1,float16,float16,0,2.639370600382487
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,1,128,1,float16,fp8,0,0.17935466766357422
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,24,128,1,float16,float16,0,0.2084266742070516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,24,128,1,float16,fp8,0,0.18146665891011557
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,2,128,1,float16,fp8,0,0.19568532705307007
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,4,128,1,float16,fp8,0,0.19393066565195718
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,4,128,1,float16,float16,0,0.1952213247617086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,8,128,1,float16,fp8,0,0.17948265870412192
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,8,128,1,float16,float16,0,0.1955839991569519
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,1,128,1,float16,float16,0,0.12798933188120523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,1,128,1,float16,fp8,0,0.10801600416501363
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,2,128,1,float16,float16,0,0.12562132875124613
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,24,128,1,float16,fp8,0,0.10846400260925293
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,24,128,1,float16,float16,0,0.1251573363939921
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,2,128,1,float16,fp8,0,0.10755733648935954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,4,128,1,float16,fp8,0,0.10762666662534077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,4,128,1,float16,float16,0,0.12820266683896384
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,8,128,1,float16,float16,0,0.12503467003504434
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,8,128,1,float16,fp8,0,0.10774933298428853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,2,128,1,float16,fp8,0,1.493567943572998
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,1,128,1,float16,float16,0,1.4949866930643718
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,1,128,1,float16,fp8,0,1.4046346346537273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,4,128,1,float16,float16,0,1.4985440572102864
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,4,128,1,float16,fp8,0,1.4061813354492188
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,1,128,1,float16,float16,0,0.7642079989115397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,8,128,1,float16,float16,0,1.5026507377624512
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,24,128,1,float16,fp8,0,0.7233440081278483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,24,128,1,float16,float16,0,0.777616024017334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,8,128,1,float16,fp8,0,1.40720001856486
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,2,128,1,float16,float16,0,0.1992853283882141
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,2,128,1,float16,float16,0,0.9001013437906901
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,2,128,1,float16,fp8,0,0.7632693449656168
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,4,128,1,float16,float16,0,0.9283626874287924
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,4,128,1,float16,fp8,0,0.7632373174031576
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,4,128,1,float16,fp8,0,1.1721546649932861
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,8,128,1,float16,float16,0,0.7676959832509359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,8,128,1,float16,fp8,0,0.764352003733317
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,24,128,1,float16,float16,0,0.4315306742986043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,1,128,1,float16,float16,0,0.4602559804916382
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,24,128,1,float16,fp8,0,0.3778826793034871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,1,128,1,float16,fp8,0,0.39797866344451904
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,2,128,1,float16,float16,0,0.4241120020548503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,2,128,1,float16,fp8,0,0.4002346595128377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,2,128,1,float16,float16,0,1.4958826700846355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,4,128,1,float16,float16,0,0.42585599422454834
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,4,128,1,float16,fp8,0,0.4000320037206014
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,8,128,1,float16,fp8,0,0.3986133337020874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,8,128,1,float16,float16,0,0.40088534355163574
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,1,128,1,float16,float16,0,0.24228266874949136
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,1,128,1,float16,fp8,0,0.21681066354115805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,2,128,1,float16,float16,0,0.24396799008051553
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,2,128,1,float16,fp8,0,0.21767467260360718
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,24,128,1,float16,float16,0,0.23271999756495157
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,24,128,1,float16,fp8,0,0.20561599731445312
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,4,128,1,float16,float16,0,0.23560533920923868
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,8,128,1,float16,fp8,0,0.2042186657587687
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,8,128,1,float16,float16,0,0.23550933599472046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,1,128,1,float16,float16,0,0.13896000385284424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,1,128,1,float16,fp8,0,0.12247467041015625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,2,128,1,float16,float16,0,0.13621333241462708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,24,128,1,float16,float16,0,0.13792533675829569
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,24,128,1,float16,fp8,0,0.11851200461387634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,1,128,1,float16,fp8,0,0.7176746527353922
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,4,128,1,float16,float16,0,0.13426132996877035
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,4,128,1,float16,fp8,0,0.11552000045776367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,8,128,1,float16,fp8,0,0.11551466584205627
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,1,128,1,float16,float16,0,0.0881706674893697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,2,128,1,float16,float16,0,0.0883146623770396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,24,128,1,float16,fp8,0,0.07411733269691467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,2,128,1,float16,fp8,0,0.0799786647160848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,4,128,1,float16,float16,0,0.08620267113049825
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,4,128,1,float16,fp8,0,0.07647466659545898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,8,128,1,float16,float16,0,0.07994133234024048
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,8,128,1,float16,fp8,0,0.07928533355395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,1,128,1,float16,float16,0,1.4975093205769856
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,1,128,1,float16,fp8,0,1.4979359308878581
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,2,128,1,float16,float16,0,1.548367977142334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,4,128,1,float16,fp8,0,0.21708265940348306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,2,128,1,float16,fp8,0,0.12521066268285116
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,2,128,1,float16,fp8,0,1.4195946057637532
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,4,128,1,float16,float16,0,1.502336025238037
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,8,128,1,float16,float16,0,0.12586667140324911
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,4,128,1,float16,fp8,0,1.4979573885599773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,1,128,1,float16,fp8,0,0.07388266424338023
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,1,128,1,float16,float16,0,0.7619199752807617
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,8,128,1,float16,fp8,0,1.4215946197509766
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,1,128,1,float16,fp8,0,0.7611733277638754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,24,128,1,float16,fp8,0,0.7279733022054037
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,24,128,1,float16,float16,0,0.7769973278045654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,2,128,1,float16,float16,0,0.7622933387756348
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,2,128,1,float16,fp8,0,0.7212586402893066
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,4,128,1,float16,float16,0,0.7639679908752441
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,4,128,1,float16,fp8,0,0.7223146756490072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,8,128,1,float16,float16,0,0.8062506516774496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,8,128,1,float16,fp8,0,0.7617973486582438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,1,128,1,float16,float16,0,0.4466613531112671
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,1,128,1,float16,fp8,0,0.3936053514480591
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,24,128,1,float16,float16,0,0.40221333503723145
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,24,128,1,float16,fp8,0,0.37637333075205487
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,2,128,1,float16,float16,0,0.45397865772247314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,2,128,1,float16,fp8,0,0.3938773473103841
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,4,128,1,float16,fp8,0,0.3731199900309245
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,8,128,1,float16,float16,0,0.39667733510335285
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,8,128,1,float16,fp8,0,0.3761813243230184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,1,128,1,float16,float16,0,0.21093332767486572
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,2,128,1,float16,fp8,0,0.19876799980799356
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,8,128,1,float16,float16,0,1.5095945994059246
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,2,128,1,float16,float16,0,0.2358400026957194
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,4,128,1,float16,float16,0,0.21222400665283203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,4,128,1,float16,fp8,0,0.21196266015370688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,8,128,1,float16,float16,0,0.22430932521820068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,8,128,1,float16,fp8,0,0.1991200049718221
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,24,128,1,float16,fp8,0,0.1207413375377655
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,24,128,1,float16,float16,0,0.13340266545613608
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,1,128,1,float16,float16,0,0.12800000111262003
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,1,128,1,float16,fp8,0,0.11176533500353496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,2,128,1,float16,fp8,0,0.11000000437100728
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,2,128,1,float16,float16,0,0.13378133376439413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,4,128,1,float16,float16,0,0.1300159990787506
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,4,128,1,float16,fp8,0,0.10976533095041911
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,8,128,1,float16,float16,0,0.12079999844233195
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,8,128,1,float16,fp8,0,0.11767466862996419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,1,128,1,float16,float16,0,0.07795199751853943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,4,128,1,float16,float16,0,0.39538665612538654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,1,128,1,float16,fp8,0,0.07167466481526692
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,24,128,1,float16,float16,0,0.07922133306662242
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,24,128,1,float16,fp8,0,0.06654933094978333
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,2,128,1,float16,float16,0,0.07308266560236613
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,2,128,1,float16,fp8,0,0.07201066613197327
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,4,128,1,float16,float16,0,0.07064533233642578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,4,128,1,float16,fp8,0,0.07148799796899159
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,8,128,1,float16,float16,0,0.07213866710662842
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,8,128,1,float16,fp8,0,0.06595733265082042
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,1,128,1,float16,fp8,0,0.21235199769337973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,1,128,1,float16,float16,0,0.048911998669306435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,24,128,1,float16,float16,0,0.04891733328501383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,1,128,1,float16,fp8,0,0.0488373339176178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,2,128,1,float16,float16,0,0.04901866614818573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,24,128,1,float16,fp8,0,0.04576533536116282
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,24,128,1,float16,float16,0,0.08065600196520488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,4,128,1,float16,fp8,0,0.04562133550643921
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,2,128,1,float16,fp8,0,0.048122664292653404
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,8,128,1,float16,fp8,0,0.04470400015513102
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,8,128,1,float16,float16,0,0.049173335234324135
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,1,128,1,float16,float16,0,0.9469813505808512
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,1,128,1,float16,fp8,0,0.9478453000386556
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,2,128,1,float16,float16,0,0.9480640093485514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,2,128,1,float16,fp8,0,0.9027199745178223
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,4,128,1,float16,float16,0,0.9497066338857015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,4,128,1,float16,fp8,0,0.947925329208374
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,24,128,1,float16,float16,0,0.2150719960530599
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,1,128,1,float16,float16,0,0.5418026844660441
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,24,128,1,float16,fp8,0,0.46591468652089435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,8,128,1,float16,float16,0,0.9538880189259847
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,24,128,1,float16,float16,0,0.4963039954503377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,1,128,1,float16,fp8,0,0.46136001745859784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,4,128,1,float16,float16,0,0.5510826508204142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,2,128,1,float16,float16,0,0.5419520139694214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,2,128,1,float16,fp8,0,0.4614933331807454
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,4,128,1,float16,fp8,0,0.46157864729563397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,8,128,1,float16,fp8,0,0.48498133818308514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,1,128,1,float16,float16,0,0.282586673895518
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,24,128,1,float16,float16,0,0.25989866256713867
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,1,128,1,float16,fp8,0,0.2561653256416321
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,2,128,1,float16,float16,0,0.26691200335820514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,2,128,1,float16,fp8,0,0.2550346652666728
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,4,128,1,float16,float16,0,0.2550613284111023
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,8,128,1,float16,float16,0,0.2571786642074585
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,8,128,1,float16,fp8,0,0.2416213353474935
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,24,128,1,float16,fp8,0,0.20067733526229858
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,8,128,1,float16,fp8,0,0.9062240123748779
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,24,128,1,float16,float16,0,0.1495626668135325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,24,128,1,float16,fp8,0,0.13365333278973898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,1,128,1,float16,float16,0,0.14893866578737894
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,1,128,1,float16,fp8,0,0.13607466220855713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,4,128,1,float16,float16,0,0.049365331729253135
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,2,128,1,float16,float16,0,0.14239999651908875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,2,128,1,float16,fp8,0,0.1395199994246165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,4,128,1,float16,fp8,0,0.13955733180046082
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,8,128,1,float16,float16,0,0.13969600200653076
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,8,128,1,float16,fp8,0,0.13192533453305563
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,8,128,1,float16,float16,0,0.4880586862564087
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,1,128,1,float16,float16,0,0.08593066533406575
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,1,128,1,float16,fp8,0,0.07998933394749959
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,24,128,1,float16,fp8,0,0.07769600053628285
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,24,128,1,float16,float16,0,0.08346133430798848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,2,128,1,float16,float16,0,0.08746133248011272
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,24,128,1,float16,fp8,0,0.24409067630767822
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,4,128,1,float16,float16,0,0.08644800384839375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,4,128,1,float16,fp8,0,0.07985599835713704
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,8,128,1,float16,float16,0,0.07999466856320699
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,8,128,1,float16,fp8,0,0.07947200040022533
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,1,128,1,float16,float16,0,0.05388799806435903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,1,128,1,float16,fp8,0,0.047839999198913574
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,2,128,1,float16,float16,0,0.05514133473237356
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,24,128,1,float16,fp8,0,0.04755199948946635
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,4,128,1,float16,fp8,0,0.2548746665318807
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,24,128,1,float16,float16,0,0.052042668064435325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,4,128,1,float16,float16,0,0.05011733373006185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,8,128,1,float16,float16,0,0.050527999798456825
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,8,128,1,float16,fp8,0,0.04997866849104563
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,1,128,1,float16,fp8,0,0.03032533327738444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,24,128,1,float16,float16,0,0.031109333038330078
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,24,128,1,float16,fp8,0,0.028805332879225414
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,2,128,1,float16,float16,0,0.03234666585922241
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,2,128,1,float16,fp8,0,0.02789866675933202
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,4,128,1,float16,float16,0,0.029525332152843475
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,4,128,1,float16,fp8,0,0.029370665550231934
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,8,128,1,float16,float16,0,0.029653333127498627
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,8,128,1,float16,fp8,0,0.027461332579453785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,4,128,1,float16,float16,0,0.15132799744606018
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,1,128,1,float16,float16,0,1.0532693068186443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,1,128,1,float16,fp8,0,0.9749386310577393
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,2,128,1,float16,float16,0,1.0137866338094075
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,2,128,1,float16,fp8,0,1.0128586292266846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,2,128,1,float16,fp8,0,0.04982399940490723
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,4,128,1,float16,fp8,0,0.04996799925963084
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,4,128,1,float16,float16,0,1.065877358118693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,1,128,1,float16,float16,0,0.033200000723203026
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,8,128,1,float16,float16,0,1.0213013490041096
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,8,128,1,float16,fp8,0,0.976469357808431
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,24,128,1,float16,float16,0,0.527450680732727
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,24,128,1,float16,fp8,0,0.5013440052668253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,1,128,1,float16,fp8,0,0.5104693174362183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,2,128,1,float16,float16,0,0.5160106817881266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,1,128,1,float16,float16,0,0.5763893524805704
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,2,128,1,float16,fp8,0,0.4960586627324422
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,8,128,1,float16,float16,0,0.520362655321757
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,4,128,1,float16,float16,0,0.5671306848526001
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,4,128,1,float16,fp8,0,0.5154346625010172
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,2,128,1,float16,fp8,0,0.07893866797288258
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,8,128,1,float16,fp8,0,0.4963359832763672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,24,128,1,float16,float16,0,0.27476799488067627
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,1,128,1,float16,float16,0,0.2863893310228984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,24,128,1,float16,fp8,0,0.25989333788553876
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,2,128,1,float16,float16,0,0.2778453429539998
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,1,128,1,float16,fp8,0,0.2562133272488912
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,2,128,1,float16,fp8,0,0.267685333887736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,4,128,1,float16,fp8,0,0.2669066588083903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,4,128,1,float16,float16,0,0.27172799905141193
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,8,128,1,float16,float16,0,0.2693973382314046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,8,128,1,float16,fp8,0,0.26769065856933594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,4,128,1,float16,fp8,0,0.9753066698710123
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,1,128,1,float16,float16,0,0.14435733358065286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,2,128,1,float16,float16,0,0.14466666181882223
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,24,128,1,float16,float16,0,0.1476746698220571
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,2,128,1,float16,fp8,0,0.14385599891344705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,1,128,1,float16,fp8,0,0.14506133397420248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,24,128,1,float16,fp8,0,0.13893333077430725
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,4,128,1,float16,fp8,0,0.13638933499654135
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,8,128,1,float16,float16,0,0.1461013356844584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,1,128,1,float16,float16,0,0.08874133229255676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,1,128,1,float16,fp8,0,0.07678933441638947
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,4,128,1,float16,float16,0,0.15714133779207864
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,24,128,1,float16,fp8,0,0.0815413345893224
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,2,128,1,float16,fp8,0,0.08098666866620381
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,24,128,1,float16,float16,0,0.08392533659934998
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,4,128,1,float16,float16,0,0.08157866696516673
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,8,128,1,float16,float16,0,0.08533866206804912
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,8,128,1,float16,fp8,0,0.07628799974918365
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,1,128,1,float16,float16,0,0.05295999844868978
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,24,128,1,float16,float16,0,0.055205335219701133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,2,128,1,float16,float16,0,0.05194133520126343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,24,128,1,float16,fp8,0,0.046442667643229164
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,2,128,1,float16,fp8,0,0.04683733483155569
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,4,128,1,float16,float16,0,0.052229334910710655
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,4,128,1,float16,fp8,0,0.046181331078211464
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,8,128,1,float16,float16,0,0.049098665515581764
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,8,128,1,float16,fp8,0,0.04585599899291992
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,1,128,1,float16,float16,0,0.03710933278004328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,1,128,1,float16,fp8,0,0.03141866624355316
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,24,128,1,float16,fp8,0,0.03225066761175791
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,2,128,1,float16,float16,0,0.03572800010442734
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,24,128,1,float16,float16,0,0.03475733349720637
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,2,128,1,float16,fp8,0,0.03161066770553589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,4,128,1,float16,float16,0,0.03411199897527695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,4,128,1,float16,fp8,0,0.033514666060606636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,8,128,1,float16,float16,0,0.03341866781314214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,8,128,1,float16,fp8,0,0.031514666974544525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,1,128,1,float16,float16,0,0.023232000569502514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,1,128,1,float16,fp8,0,0.021525333325068157
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,2,128,1,float16,float16,0,0.02319466571013133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,24,128,1,float16,fp8,0,0.02327999969323476
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,2,128,1,float16,fp8,0,0.021456000705560047
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,8,128,1,float16,fp8,0,0.13741333285967508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,4,128,1,float16,float16,0,0.023269332945346832
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,4,128,1,float16,fp8,0,0.023178666830062866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,2,128,1,float16,float16,0,0.08170666793982188
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,8,128,1,float16,fp8,0,0.02231466770172119
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,8,128,1,float16,float16,0,0.023381332556406658
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,4,128,1,float16,fp8,0,0.08132266501585643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,1,128,1,float16,fp8,0,0.04910933474699656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,1,128,1,float16,float16,0,0.7669599850972494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,1,128,1,float16,fp8,0,0.7493493556976318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,2,128,1,float16,float16,0,0.7691893577575684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,2,128,1,float16,fp8,0,0.7488426367441813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,4,128,1,float16,float16,0,0.7701173623402914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,4,128,1,float16,fp8,0,0.7644586563110352
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,24,128,1,float16,float16,0,0.023685333629449207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,24,128,1,float16,float16,0,0.3996373414993286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,1,128,1,float16,float16,0,0.3916533390680949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,8,128,1,float16,float16,0,0.773354689280192
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,24,128,1,float16,fp8,0,0.3834666808446248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,2,128,1,float16,fp8,0,0.38205866018931073
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,8,128,1,float16,fp8,0,0.7490666707356771
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,2,128,1,float16,float16,0,0.39191468556722003
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,1,128,1,float16,fp8,0,0.381168007850647
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,4,128,1,float16,fp8,0,0.38149333000183105
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,4,128,1,float16,float16,0,0.40399467945098877
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,1,128,1,float16,fp8,0,0.19765865802764893
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,1,128,1,float16,float16,0,0.21516267458597818
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,24,128,1,float16,float16,0,0.20919466018676758
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,8,128,1,float16,float16,0,0.3943519989649455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,4,128,1,float16,fp8,0,0.2034613291422526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,4,128,1,float16,float16,0,0.20480533440907797
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,2,128,1,float16,float16,0,0.20470933119455972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,2,128,1,float16,fp8,0,0.20341867208480835
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,8,128,1,float16,fp8,0,0.38155198097229004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,8,128,1,float16,float16,0,0.20567466815312704
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,8,128,1,float16,fp8,0,0.19908267259597778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,1,128,1,float16,float16,0,0.1169706682364146
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,1,128,1,float16,fp8,0,0.10653866330782573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,4,128,1,float16,fp8,0,0.10909333825111389
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,24,128,1,float16,float16,0,0.11331733067830403
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,24,128,1,float16,fp8,0,0.10693333546320598
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,8,128,1,float16,float16,0,0.11150399843851726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,4,128,1,float16,float16,0,0.11206400394439697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,2,128,1,float16,fp8,0,0.11069867014884949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,8,128,1,float16,fp8,0,0.10983999570210774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,24,128,1,float16,float16,0,0.0710346649090449
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,24,128,1,float16,fp8,0,0.06079466640949249
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,2,128,1,float16,float16,0,0.06786666810512543
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,1,128,1,float16,fp8,0,0.061861331264177956
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,2,128,1,float16,fp8,0,0.06230400005976359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,4,128,1,float16,float16,0,0.06347733239332835
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,1,128,1,float16,float16,0,0.06473599870999654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,4,128,1,float16,fp8,0,0.061850666999816895
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,24,128,1,float16,fp8,0,0.199072003364563
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,8,128,1,float16,float16,0,0.06293333570162456
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,8,128,1,float16,fp8,0,0.061146666606267296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,1,128,1,float16,fp8,0,0.03552533437808355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,24,128,1,float16,fp8,0,0.03534399966398875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,24,128,1,float16,float16,0,0.038917332887649536
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,2,128,1,float16,fp8,0,0.03721066564321518
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,4,128,1,float16,float16,0,0.03745600084463755
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,4,128,1,float16,fp8,0,0.03777066618204117
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,8,128,1,float16,fp8,0,0.035616000493367515
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,8,128,1,float16,float16,0,0.03729599962631861
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,1,128,1,float16,fp8,0,0.025242666403452556
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,1,128,1,float16,float16,0,0.02718399961789449
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,24,128,1,float16,float16,0,0.026309333741664886
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,24,128,1,float16,fp8,0,0.025349333882331848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,2,128,1,float16,float16,0,0.025605333348115284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,2,128,1,float16,fp8,0,0.02534399926662445
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,4,128,1,float16,float16,0,0.025311999022960663
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,4,128,1,float16,fp8,0,0.025424001117547352
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,8,128,1,float16,float16,0,0.02555199960867564
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,8,128,1,float16,fp8,0,0.025279998779296875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,1,128,1,float16,float16,0,0.01714666684468587
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,1,128,1,float16,fp8,0,0.016986666868130367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,2,128,1,float16,float16,0,0.017050666113694508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,24,128,1,float16,float16,0,0.01905599981546402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,2,128,1,float16,fp8,0,0.01687466725707054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,4,128,1,float16,float16,0,0.017173333714405697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,8,128,1,float16,float16,0,0.019013332823912304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,8,128,1,float16,fp8,0,0.016789333273967106
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,2,128,1,float16,float16,0,0.1097813347975413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,1,128,1,float16,float16,0,0.039818666875362396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,24,128,1,float16,float16,0,0.017893332988023758
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,1,128,1,float16,float16,0,0.016949333250522614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,1,128,1,float16,fp8,0,0.01709866647919019
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,2,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,4,128,1,float16,float16,0,0.017055999487638474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,4,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,2,128,1,float16,float16,0,0.03728000074625015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,8,128,1,float16,float16,0,0.017370666066805523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,1,128,1,float16,fp8,0,0.3198773264884949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,1,128,1,float16,float16,0,0.328272004922231
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,2,128,1,float16,float16,0,0.32224533955256146
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,24,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,4,128,1,float16,fp8,0,0.016970666746298473
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,2,128,1,float16,fp8,0,0.3198453386624654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,4,128,1,float16,float16,0,0.3295680085817973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,24,128,1,float16,fp8,0,0.01674666628241539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,4,128,1,float16,fp8,0,0.31989334026972455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,2,128,1,float16,float16,0,0.01714133347074191
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,1,128,1,float16,float16,0,0.16684265931447348
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,8,128,1,float16,float16,0,0.32497066259384155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,24,128,1,float16,float16,0,0.16987200578053793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,2,128,1,float16,float16,0,0.17083199818929037
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,8,128,1,float16,fp8,0,0.32016533613204956
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,1,128,1,float16,fp8,0,0.16811732451121011
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,24,128,1,float16,fp8,0,0.1658453345298767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,2,128,1,float16,fp8,0,0.16459199786186218
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,4,128,1,float16,fp8,0,0.16828266779581705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,8,128,1,float16,float16,0,0.1713599960009257
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,4,128,1,float16,float16,0,0.1739573280016581
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,1,128,1,float16,float16,0,0.09119466940561931
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,24,128,1,float16,fp8,0,0.09059199690818787
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,24,128,1,float16,float16,0,0.09501333038012187
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,1,128,1,float16,fp8,0,0.08924800157546997
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,4,128,1,float16,float16,0,0.09070932865142822
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,2,128,1,float16,fp8,0,0.09075199564297994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,4,128,1,float16,fp8,0,0.08992000420888265
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,8,128,1,float16,float16,0,0.09103999535242717
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,8,128,1,float16,fp8,0,0.08873599767684937
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,24,128,1,float16,fp8,0,0.05180799961090088
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,24,128,1,float16,float16,0,0.05566933254400889
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,1,128,1,float16,fp8,0,0.05263466636339823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,8,128,1,float16,fp8,0,0.016906666258970898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,2,128,1,float16,fp8,0,0.05231466889381409
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,4,128,1,float16,float16,0,0.05597866574923197
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,4,128,1,float16,fp8,0,0.05189866820971171
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,8,128,1,float16,fp8,0,0.05184000233809153
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,8,128,1,float16,float16,0,0.05449600021044413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,24,128,1,float16,float16,0,0.03143999973932902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,1,128,1,float16,float16,0,0.03143999973932902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,1,128,1,float16,fp8,0,0.030837332208951313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,2,128,1,float16,float16,0,0.03136533250411352
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,2,128,1,float16,fp8,0,0.02975466599067052
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,4,128,1,float16,float16,0,0.029722665747006733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,8,128,1,float16,fp8,0,0.1655306617418925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,4,128,1,float16,fp8,0,0.02940266579389572
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,8,128,1,float16,float16,0,0.03139200061559677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,8,128,1,float16,fp8,0,0.02941333254178365
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,2,128,1,float16,float16,0,0.0958666702111562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,24,128,1,float16,fp8,0,0.02250133454799652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,2,128,1,float16,float16,0,0.02309333284695943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,2,128,1,float16,fp8,0,0.02179733415444692
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,1,128,1,float16,float16,0,0.05386666456858317
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,4,128,1,float16,float16,0,0.023376000424226124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,4,128,1,float16,fp8,0,0.021189334491888683
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,8,128,1,float16,float16,0,0.023034666975339253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,2,128,1,float16,float16,0,0.05472533404827118
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,8,128,1,float16,fp8,0,0.021141332884629566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,1,128,1,float16,float16,0,0.015061333775520325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,1,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,24,128,1,float16,float16,0,0.015168000012636185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,2,128,1,float16,float16,0,0.014943999548753103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,2,128,1,float16,fp8,0,0.014874666929244995
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,4,128,1,float16,float16,0,0.015040000279744467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,4,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,24,128,1,float16,fp8,0,0.029440000653266907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,8,128,1,float16,fp8,0,0.014752000570297241
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,8,128,1,float16,float16,0,0.01509333277742068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,1,128,1,float16,float16,0,0.014943999548753103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,24,128,1,float16,fp8,0,0.014698666830857595
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,1,128,1,float16,fp8,0,0.014954666296641031
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,2,128,1,float16,float16,0,0.014736000448465347
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,2,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,4,128,1,float16,float16,0,0.014853333433469137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,4,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,24,128,1,float16,float16,0,0.023221333821614582
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,8,128,1,float16,float16,0,0.015040000279744467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,8,128,1,float16,fp8,0,0.014869333555301031
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,1,128,1,float16,float16,0,0.01509333277742068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,24,128,1,float16,fp8,0,0.012837332983811697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,1,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,2,128,1,float16,float16,0,0.015061333775520325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,24,128,1,float16,float16,0,0.015002666662136713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,2,128,1,float16,fp8,0,0.01360000049074491
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,4,128,1,float16,fp8,0,0.01522133375207583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,4,128,1,float16,float16,0,0.014789332946141561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,8,128,1,float16,float16,0,0.01469333345691363
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,8,128,1,float16,fp8,0,0.013408000270525614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,24,128,1,float16,fp8,0,0.014730667074521383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,24,128,1,float16,float16,0,0.015114666273196539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,1,128,1,float16,float16,0,0.023183998962243397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,1,128,1,float16,float16,0,0.20374933878580728
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,1,128,1,float16,fp8,0,0.19902400175730386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,2,128,1,float16,fp8,0,0.19941333929697672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,4,128,1,float16,float16,0,0.20338133970896402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,4,128,1,float16,fp8,0,0.19929067293802896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,1,128,1,float16,float16,0,0.10782933235168457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,8,128,1,float16,fp8,0,0.19950934251149496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,24,128,1,float16,fp8,0,0.10498133301734924
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,24,128,1,float16,float16,0,0.11125333110491435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,1,128,1,float16,fp8,0,0.10522133111953735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,2,128,1,float16,float16,0,0.10725866754849751
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,8,128,1,float16,float16,0,0.20348799228668213
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,8,128,1,float16,float16,0,0.10751466949780782
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,2,128,1,float16,fp8,0,0.10508267084757487
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,8,128,1,float16,fp8,0,0.10371200243631999
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,1,128,1,float16,fp8,0,0.021583999196688335
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,2,128,1,float16,float16,0,0.20337599515914917
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,24,128,1,float16,fp8,0,0.05797866483529409
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,1,128,1,float16,float16,0,0.0610346645116806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,2,128,1,float16,float16,0,0.05816000203291575
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,4,128,1,float16,fp8,0,0.1051093339920044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,1,128,1,float16,fp8,0,0.05773866673310598
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,24,128,1,float16,float16,0,0.05994666616121928
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,4,128,1,float16,fp8,0,0.05805333455403646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,2,128,1,float16,fp8,0,0.05807466804981232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,4,128,1,float16,float16,0,0.06070933242638906
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,8,128,1,float16,float16,0,0.05973333120346069
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,8,128,1,float16,fp8,0,0.05778133372465769
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,1,128,1,float16,float16,0,0.035616000493367515
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,24,128,1,float16,float16,0,0.0354720006386439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,1,128,1,float16,fp8,0,0.033973333736260734
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,24,128,1,float16,fp8,0,0.03366400053103765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,2,128,1,float16,float16,0,0.03571200122435888
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,4,128,1,float16,fp8,0,0.03350399931271871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,2,128,1,float16,fp8,0,0.03358400116364161
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,8,128,1,float16,float16,0,0.035429333647092186
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,8,128,1,float16,fp8,0,0.033520000676314034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,1,128,1,float16,float16,0,0.021168000996112823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,24,128,1,float16,fp8,0,0.021205333371957142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,4,128,1,float16,float16,0,0.021226666867733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,2,128,1,float16,float16,0,0.021221332252025604
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,24,128,1,float16,float16,0,0.02111999938885371
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,4,128,1,float16,fp8,0,0.020975999534130096
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,8,128,1,float16,fp8,0,0.02080533280968666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,1,128,1,float16,float16,0,0.015856000284353893
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,24,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,24,128,1,float16,float16,0,0.01526933287580808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,1,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,2,128,1,float16,float16,0,0.016458666572968166
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,4,128,1,float16,float16,0,0.01624533285697301
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,4,128,1,float16,fp8,0,0.015418666104475657
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,4,128,1,float16,float16,0,0.10987200339635213
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,8,128,1,float16,float16,0,0.015077333897352219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,8,128,1,float16,fp8,0,0.015397333850463232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,1,128,1,float16,float16,0,0.010992000500361124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,1,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,24,128,1,float16,fp8,0,0.01071999967098236
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,2,128,1,float16,float16,0,0.010992000500361124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,24,128,1,float16,float16,0,0.010826667149861654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,2,128,1,float16,fp8,0,0.010725333044926325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,4,128,1,float16,float16,0,0.010816000401973724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,4,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,4,128,1,float16,float16,0,0.035173334181308746
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,8,128,1,float16,float16,0,0.011002667248249054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,8,128,1,float16,fp8,0,0.010693332801262537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,1,128,1,float16,fp8,0,0.020992000897725422
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,1,128,1,float16,float16,0,0.010746666540702185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,1,128,1,float16,fp8,0,0.01071999967098236
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,2,128,1,float16,fp8,0,0.021359999974568684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,24,128,1,float16,float16,0,0.010709332923094431
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,2,128,1,float16,float16,0,0.010650667051474253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,2,128,1,float16,fp8,0,0.010597333312034607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,24,128,1,float16,fp8,0,0.010682666053374609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,4,128,1,float16,fp8,0,0.012234666695197424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,8,128,1,float16,float16,0,0.010863999525705973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,8,128,1,float16,float16,0,0.021130666136741638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,8,128,1,float16,fp8,0,0.010682666053374609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,1,128,1,float16,float16,0,0.010703999549150467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,1,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,2,128,1,float16,fp8,0,0.01062400018175443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,2,128,1,float16,float16,0,0.010634666929642359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,4,128,1,float16,fp8,0,0.010837333897749582
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,2,128,1,float16,fp8,0,0.015130666395028433
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,8,128,1,float16,fp8,0,0.010735999792814255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,1,128,1,float16,float16,0,0.16139733791351318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,1,128,1,float16,fp8,0,0.1560640037059784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,2,128,1,float16,float16,0,0.16014933586120605
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,4,128,1,float16,float16,0,0.010922666639089584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,2,128,1,float16,fp8,0,0.15636266271273294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,4,128,1,float16,float16,0,0.16049066185951233
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,24,128,1,float16,float16,0,0.010698666175206503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,4,128,1,float16,float16,0,0.010954666882753372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,8,128,1,float16,float16,0,0.010714666297038397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,4,128,1,float16,fp8,0,0.1563040018081665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,24,128,1,float16,float16,0,0.08681600292523702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,8,128,1,float16,fp8,0,0.15621333320935568
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,8,128,1,float16,float16,0,0.16218133767445883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,1,128,1,float16,float16,0,0.08672533432642619
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,24,128,1,float16,fp8,0,0.0823520024617513
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,1,128,1,float16,fp8,0,0.08264000217119853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,2,128,1,float16,float16,0,0.08541867136955261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,8,128,1,float16,float16,0,0.08573333422342937
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,4,128,1,float16,float16,0,0.08797867099444072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,2,128,1,float16,fp8,0,0.08260799944400787
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,24,128,1,float16,float16,0,0.048565333088239036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,24,128,1,float16,fp8,0,0.04563733438650767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,1,128,1,float16,float16,0,0.04961066444714864
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,1,128,1,float16,fp8,0,0.045978665351867676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,2,128,1,float16,fp8,0,0.046181331078211464
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,4,128,1,float16,float16,0,0.04764799773693085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,4,128,1,float16,fp8,0,0.04562133550643921
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,24,128,1,float16,fp8,0,0.010885333021481832
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,8,128,1,float16,float16,0,0.047082667549451195
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,8,128,1,float16,fp8,0,0.04560000201066335
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,1,128,1,float16,float16,0,0.029125332832336426
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,24,128,1,float16,fp8,0,0.027285332481066387
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,24,128,1,float16,float16,0,0.02942399928967158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,1,128,1,float16,fp8,0,0.027509334186712902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,4,128,1,float16,float16,0,0.02938666691382726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,4,128,1,float16,fp8,0,0.027327999472618103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,8,128,1,float16,float16,0,0.027744000156720478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,8,128,1,float16,fp8,0,0.027434666951497395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,24,128,1,float16,float16,0,0.017488000293572743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,1,128,1,float16,float16,0,0.017925333231687546
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,24,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,1,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,2,128,1,float16,float16,0,0.017386666188637417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,2,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,4,128,1,float16,fp8,0,0.08442133665084839
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,4,128,1,float16,float16,0,0.0170666662355264
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,4,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,8,128,1,float16,float16,0,0.017573333034912746
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,8,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,2,128,1,float16,float16,0,0.04786133269468943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,24,128,1,float16,float16,0,0.014096000542243322
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,1,128,1,float16,float16,0,0.015098666151364645
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,2,128,1,float16,float16,0,0.013210666676362356
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,2,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,4,128,1,float16,float16,0,0.013605333864688873
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,4,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,8,128,1,float16,float16,0,0.01310933381319046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,8,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,24,128,1,float16,float16,0,0.010687999427318573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,2,128,1,float16,fp8,0,0.027317332724730175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,2,128,1,float16,float16,0,0.029088000456492107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,24,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,1,128,1,float16,fp8,0,0.010693332801262537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,2,128,1,float16,float16,0,0.010677333921194077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,2,128,1,float16,fp8,0,0.01062400018175443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,4,128,1,float16,float16,0,0.010960000256697336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,4,128,1,float16,fp8,0,0.010288000106811523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,8,128,1,float16,float16,0,0.010821333775917688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,8,128,1,float16,fp8,0,0.01062400018175443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,1,128,1,float16,float16,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,24,128,1,float16,float16,0,0.010672000547250112
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,1,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,2,128,1,float16,float16,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,2,128,1,float16,fp8,0,0.010005333150426546
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,4,128,1,float16,float16,0,0.009162666896979014
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,4,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,8,128,1,float16,float16,0,0.010885333021481832
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,8,128,1,float16,fp8,0,0.008837333569924036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,8,128,1,float16,fp8,0,0.08269333342711131
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,24,128,1,float16,float16,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,1,128,1,float16,float16,0,0.009349333122372627
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,1,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,24,128,1,float16,fp8,0,0.00879466657837232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,2,128,1,float16,float16,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,2,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,1,128,1,float16,fp8,0,0.012944000462690989
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,4,128,1,float16,fp8,0,0.009098666409651438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,8,128,1,float16,float16,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,8,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,1,128,1,float16,float16,0,0.01091733326514562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,1,128,1,float16,float16,0,0.14272000392278036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,1,128,1,float16,fp8,0,0.13591999808947244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,2,128,1,float16,float16,0,0.1432319978872935
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,24,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,2,128,1,float16,fp8,0,0.13723199566205344
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,4,128,1,float16,float16,0,0.1400159994761149
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,4,128,1,float16,fp8,0,0.13622400164604187
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,8,128,1,float16,float16,0,0.14365333318710327
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,24,128,1,float16,float16,0,0.07701333363850911
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,24,128,1,float16,fp8,0,0.07268266876538594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,8,128,1,float16,fp8,0,0.13740799824396768
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,1,128,1,float16,fp8,0,0.07308266560236613
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,24,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,2,128,1,float16,float16,0,0.07815999786059062
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,8,128,1,float16,float16,0,0.07644266883532207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,4,128,1,float16,float16,0,0.07737066845099132
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,4,128,1,float16,fp8,0,0.07249600191911061
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,8,128,1,float16,fp8,0,0.07204266885916392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,24,128,1,float16,float16,0,0.043680002291997276
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,1,128,1,float16,fp8,0,0.04015466570854187
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,1,128,1,float16,float16,0,0.04373333354791006
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,2,128,1,float16,float16,0,0.043103997906049095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,24,128,1,float16,fp8,0,0.03991466760635376
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,2,128,1,float16,fp8,0,0.041637333730856575
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,4,128,1,float16,float16,0,0.042453333735466
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,4,128,1,float16,fp8,0,0.03992533435424169
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,8,128,1,float16,fp8,0,0.0406986673672994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,8,128,1,float16,float16,0,0.041759997606277466
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,1,128,1,float16,float16,0,0.02698666602373123
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,24,128,1,float16,float16,0,0.027141332626342773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,24,128,1,float16,fp8,0,0.025114665428797405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,1,128,1,float16,fp8,0,0.025029333929220837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,2,128,1,float16,fp8,0,0.025221332907676697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,2,128,1,float16,float16,0,0.02740799884001414
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,4,128,1,float16,float16,0,0.02731200059254964
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,8,128,1,float16,float16,0,0.026752000053723652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,8,128,1,float16,fp8,0,0.025333332518736523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,1,128,1,float16,float16,0,0.016810666769742966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,24,128,1,float16,fp8,0,0.015237333873907724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,1,128,1,float16,float16,0,0.07678933441638947
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,4,128,1,float16,float16,0,0.009018666421373686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,24,128,1,float16,float16,0,0.017082666357358296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,2,128,1,float16,float16,0,0.017157333592573803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,2,128,1,float16,fp8,0,0.016415999581416447
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,4,128,1,float16,float16,0,0.01714666684468587
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,4,128,1,float16,fp8,0,0.016063999384641647
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,8,128,1,float16,float16,0,0.016997333616018295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,8,128,1,float16,fp8,0,0.01670933390657107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,1,128,1,float16,float16,0,0.012784000486135483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,2,128,1,float16,fp8,0,0.07241599758466084
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,2,128,1,float16,float16,0,0.012965332716703415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,24,128,1,float16,float16,0,0.013013333082199097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,1,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,24,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,2,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,4,128,1,float16,float16,0,0.012954667210578918
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,4,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,8,128,1,float16,float16,0,0.012778667112191519
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,8,128,1,float16,fp8,0,0.012938667088747025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,1,128,1,float16,float16,0,0.009039999917149544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,1,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,2,128,1,float16,float16,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,4,128,1,float16,float16,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,24,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,2,128,1,float16,fp8,0,0.009749333063761393
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,24,128,1,float16,float16,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,8,128,1,float16,float16,0,0.008842666943868002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,8,128,1,float16,fp8,0,0.009173333023985228
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,1,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,1,128,1,float16,float16,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,2,128,1,float16,float16,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,2,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,24,128,1,float16,fp8,0,0.00877333308259646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,24,128,1,float16,float16,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,4,128,1,float16,float16,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,8,128,1,float16,float16,0,0.008821333448092142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,8,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,1,128,1,float16,float16,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,1,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,24,128,1,float16,float16,0,0.008645333349704742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,24,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,2,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,4,128,1,float16,float16,0,0.008645333349704742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,4,128,1,float16,fp8,0,0.008810666700204214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,1,128,1,float16,fp8,0,0.016341333587964375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,8,128,1,float16,float16,0,0.008736000085870424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,8,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,1,128,1,float16,float16,0,0.13803199927012125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,1,128,1,float16,fp8,0,0.12864533066749573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,2,128,1,float16,float16,0,0.13828800121943155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,4,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,2,128,1,float16,fp8,0,0.12729600071907043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,4,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,4,128,1,float16,fp8,0,0.1277653376261393
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,4,128,1,float16,fp8,0,0.02510400116443634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,8,128,1,float16,float16,0,0.1406613290309906
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,2,128,1,float16,float16,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,8,128,1,float16,fp8,0,0.12777066230773926
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,1,128,1,float16,float16,0,0.0748586654663086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,24,128,1,float16,fp8,0,0.06823466718196869
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,24,128,1,float16,float16,0,0.07474666833877563
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,1,128,1,float16,fp8,0,0.06891199946403503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,2,128,1,float16,float16,0,0.07611733178297679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,2,128,1,float16,fp8,0,0.06832533578077953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,4,128,1,float16,float16,0,0.07421866556008656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,8,128,1,float16,fp8,0,0.0689333329598109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,4,128,1,float16,fp8,0,0.07009066641330719
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,24,128,1,float16,float16,0,0.04370133578777313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,8,128,1,float16,float16,0,0.07443200051784515
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,1,128,1,float16,float16,0,0.04188266893227895
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,24,128,1,float16,fp8,0,0.037647999823093414
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,1,128,1,float16,fp8,0,0.03957866628964742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,2,128,1,float16,float16,0,0.041802664597829185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,2,128,1,float16,fp8,0,0.0395359992980957
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,4,128,1,float16,float16,0,0.04211199780305227
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,4,128,1,float16,fp8,0,0.03782399992148081
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,8,128,1,float16,float16,0,0.042378668983777366
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,8,128,1,float16,fp8,0,0.03932799895604452
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,1,128,1,float16,float16,0,0.025306666890780132
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,1,128,1,float16,fp8,0,0.023183998962243397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,24,128,1,float16,float16,0,0.02532800038655599
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,24,128,1,float16,fp8,0,0.023237332701683044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,4,128,1,float16,float16,0,0.14089066783587137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,4,128,1,float16,float16,0,0.02532800038655599
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,4,128,1,float16,fp8,0,0.0232640008131663
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,2,128,1,float16,float16,0,0.025439999997615814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,8,128,1,float16,float16,0,0.025386666258176167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,1,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,1,128,1,float16,float16,0,0.01693333312869072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,24,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,24,128,1,float16,float16,0,0.016986666868130367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,4,128,1,float16,float16,0,0.01674666628241539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,4,128,1,float16,fp8,0,0.015295999745527903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,2,128,1,float16,fp8,0,0.015344000111023584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,1,128,1,float16,float16,0,0.012954667210578918
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,8,128,1,float16,fp8,0,0.014933332800865173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,1,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,2,128,1,float16,float16,0,0.012618667135636011
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,24,128,1,float16,float16,0,0.012879999975363413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,24,128,1,float16,fp8,0,0.012613333761692047
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,2,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,4,128,1,float16,float16,0,0.012773333738247553
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,4,128,1,float16,fp8,0,0.012682666381200155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,8,128,1,float16,fp8,0,0.012773333738247553
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,1,128,1,float16,float16,0,0.01022933361430963
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,24,128,1,float16,float16,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,1,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,24,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,2,128,1,float16,float16,0,0.009061333412925402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,2,128,1,float16,fp8,0,0.009183999771873156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,4,128,1,float16,float16,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,4,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,8,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,8,128,1,float16,float16,0,0.010709332923094431
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,1,128,1,float16,float16,0,0.00879466657837232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,2,128,1,float16,fp8,0,0.023930666347344715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,24,128,1,float16,float16,0,0.008752000207702318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,24,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,1,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,2,128,1,float16,float16,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,8,128,1,float16,fp8,0,0.02455466737349828
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,4,128,1,float16,float16,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,4,128,1,float16,fp8,0,0.008842666943868002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,8,128,1,float16,float16,0,0.008853333070874214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,8,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,1,128,1,float16,float16,0,0.008752000207702318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,1,128,1,float16,fp8,0,0.008762666955590248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,2,128,1,float16,float16,0,0.008858666444818178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,8,128,1,float16,float16,0,0.01714133347074191
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,24,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,24,128,1,float16,float16,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,4,128,1,float16,float16,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,4,128,1,float16,fp8,0,0.009114666531483332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,8,128,1,float16,float16,0,0.009370666618148485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,8,128,1,float16,float16,0,0.012975999464591345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,1,128,1,float16,float16,0,0.012655999511480331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,1,128,1,float16,fp8,0,0.010869332899649939
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,0,0.014352000008026758
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,0,0.017157333592573803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,2,128,1,float16,fp8,0,0.008789333204428354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,0,0.027274665733178455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,0,0.023157333334287006
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,24,128,1,float16,float16,0,0.03998400022586187
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,1,128,1,float16,float16,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,2,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,1,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,24,128,1,float16,fp8,0,0.031162666777769726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,8,128,1,float16,fp8,0,0.008661333471536636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,0,0.011359999577204386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,0,0.016879999389251072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,1,128,1,float16,float16,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,24,128,1,float16,fp8,0,0.019253333409627277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,1,128,1,float16,fp8,0,0.008661333471536636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,0,0.007237333183487256
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,0,0.011141333729028702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,0,0.010618666807810465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,2,128,1,float16,float16,0,0.016927999754746754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,24,128,1,float16,float16,0,0.015013333410024643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,1,128,1,float16,float16,0,0.00860799973209699
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,1,128,1,float16,fp8,0,0.008767999708652496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,24,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,0,0.008613333106040955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,0,0.0069226666043202085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,0,0.009050666665037474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,0,0.010965333630641302
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,1,128,1,float16,float16,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,1,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,24,128,1,float16,float16,0,0.010949333508809408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,0,0.007418666655818622
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,0,0.014661333213249842
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,0,0.006762666627764702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,0,0.006805333619316419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,0,0.006853333363930385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,24,128,1,float16,float16,0,0.022837333381175995
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,0,0.007653333246707916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,0,0.00697066696981589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,1,128,1,float16,float16,0,0.008586666857202848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,0,0.007002666592597961
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,0,0.00690133310854435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,24,128,1,float16,float16,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,24,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,0,0.006735999758044879
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,0,0.006890666360656421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,1,128,1,float16,float16,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,1,128,1,float16,fp8,0,0.008613333106040955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,0,0.006826666494210561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,24,128,1,float16,float16,0,0.007802666475375493
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,24,128,1,float16,fp8,0,0.0069973332186539965
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,0,0.006645333642760913
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,0,0.006784000123540561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,0,0.006698666761318843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,0,0.006768000001708667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,1,128,1,float16,float16,0,0.008762666955590248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,1,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,0,0.00697066696981589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,24,128,1,float16,fp8,0,0.006810666372378667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,0,0.008053333188096682
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,24,128,1,float16,float16,0,0.006751999879876773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,0,0.007082666580875714
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,0,0.006810666372378667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,0,0.006821333120266597
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,0,0.007055999711155891
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,0,0.006682666639486949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,1,128,1,float16,float16,0,0.009194666519761086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,1,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,24,128,1,float16,float16,0,0.00702400008837382
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,24,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,24,128,1,float16,fp8,0,0.0068853336075941724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,0,0.006741333131988843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,0,0.006682666639486949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,0,0.00696000022192796
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,1,128,1,float16,fp8,0,0.008650666723648706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,0,0.006917333230376244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,1,128,1,float16,fp8,0,5.299423853556315
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,2,128,1,float16,float16,0,5.48307736714681
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,1,128,1,float16,float16,0,6.9138132731119795
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,0,0.006906666482488315
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,0,0.00697066696981589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,2,128,1,float16,fp8,0,5.04800542195638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,0,0.006714666883150737
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,4,128,1,float16,float16,0,6.625125249226888
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,16,128,1,float16,float16,0,2.811744054158529
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,16,128,1,float16,fp8,0,2.583125273386637
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,1,128,1,float16,fp8,0,2.582815965016683
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,1,128,1,float16,float16,0,3.5170825322469077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,4,128,1,float16,fp8,0,5.290207862854004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,8,128,1,float16,float16,0,5.486186981201172
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,8,128,1,float16,fp8,0,5.206399917602539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,2,128,1,float16,float16,0,3.5595572789510093
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,2,128,1,float16,fp8,0,2.580047925313314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,4,128,1,float16,float16,0,2.8071521123250327
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,4,128,1,float16,fp8,0,2.6694294611612954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,8,128,1,float16,fp8,0,2.7429494857788086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,16,128,1,float16,float16,0,1.470479965209961
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,8,128,1,float16,float16,0,3.0621814727783203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,16,128,1,float16,fp8,0,1.3543306986490886
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,1,128,1,float16,float16,0,1.8505813280741374
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,1,128,1,float16,fp8,0,1.4349973996480305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,2,128,1,float16,fp8,0,1.4362932840983074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,2,128,1,float16,float16,0,1.7741066614786785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,4,128,1,float16,fp8,0,1.4320319493611653
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,4,128,1,float16,float16,0,1.7833120028177898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,16,128,1,float16,float16,0,0.8014079729715983
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,8,128,1,float16,float16,0,1.5147840181986492
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,1,128,1,float16,float16,0,0.9563732941945394
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,16,128,1,float16,fp8,0,0.734666665395101
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,8,128,1,float16,fp8,0,1.6856266657511394
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,1,128,1,float16,fp8,0,0.7782133420308431
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,2,128,1,float16,float16,0,0.9416960080464681
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,2,128,1,float16,fp8,0,0.730837345123291
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,4,128,1,float16,fp8,0,0.8390826384226481
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,8,128,1,float16,fp8,0,0.8182026545206705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,8,128,1,float16,float16,0,0.8008586565653483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,1,128,1,float16,float16,0,3.1643199920654297
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,1,128,1,float16,fp8,0,3.125093460083008
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,2,128,1,float16,float16,0,3.164496103922526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,2,128,1,float16,fp8,0,3.179109255472819
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,4,128,1,float16,fp8,0,3.1142613093058267
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,4,128,1,float16,float16,0,4.1526187260945635
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,16,128,1,float16,float16,0,1.6358720461527507
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,16,128,1,float16,fp8,0,1.50984525680542
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,1,128,1,float16,float16,0,1.7467039426167805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,8,128,1,float16,float16,0,3.1674931844075522
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,1,128,1,float16,fp8,0,2.109327952067057
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,4,128,1,float16,float16,0,0.8246133327484131
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,2,128,1,float16,float16,0,1.6388266881306965
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,2,128,1,float16,fp8,0,1.500805377960205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,4,128,1,float16,float16,0,1.639285405476888
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,4,128,1,float16,fp8,0,1.5042667388916016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,8,128,1,float16,float16,0,1.6334346135457356
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,16,128,1,float16,float16,0,0.8686506748199463
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,1,128,1,float16,fp8,0,0.8490346272786459
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,2,128,1,float16,float16,0,0.9608213106791178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,8,128,1,float16,fp8,0,1.8518880208333333
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,16,128,1,float16,fp8,0,0.7968959808349609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,2,128,1,float16,fp8,0,0.8485813140869141
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,4,128,1,float16,fp8,0,0.8484319845835367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,4,128,1,float16,float16,0,0.9294986724853516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,8,128,1,float16,fp8,0,0.8109813531239828
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,8,128,1,float16,float16,0,0.8680319786071777
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,16,128,1,float16,float16,0,0.4854613145192464
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,1,128,1,float16,float16,0,0.513754685719808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,16,128,1,float16,fp8,0,0.4444426695505778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,2,128,1,float16,float16,0,0.5664800008138021
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,2,128,1,float16,fp8,0,0.4736800193786621
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,4,128,1,float16,float16,0,0.515999992688497
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,4,128,1,float16,fp8,0,0.44146132469177246
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,8,128,1,float16,float16,0,0.48443734645843506
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,8,128,1,float16,fp8,0,3.1134719848632812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,1,128,1,float16,float16,0,2.4063572883605957
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,1,128,1,float16,fp8,0,2.2091520627339682
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,2,128,1,float16,float16,0,2.390106678009033
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,2,128,1,float16,fp8,0,2.0647093454996743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,1,128,1,float16,float16,0,0.9237013657887777
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,4,128,1,float16,float16,0,2.253866672515869
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,4,128,1,float16,fp8,0,2.0977813402811685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,16,128,1,float16,float16,0,1.1671946843465169
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,8,128,1,float16,float16,0,2.246016025543213
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,8,128,1,float16,fp8,0,2.0662666956583657
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,1,128,1,float16,float16,0,1.4223732948303223
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,16,128,1,float16,fp8,0,1.1191413402557373
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,1,128,1,float16,fp8,0,1.1463840007781982
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,1,128,1,float16,fp8,0,0.4411840041478475
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,2,128,1,float16,float16,0,1.2434666951497395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,2,128,1,float16,fp8,0,1.1435680389404297
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,4,128,1,float16,float16,0,1.165013313293457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,4,128,1,float16,fp8,0,1.1887413660685222
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,8,128,1,float16,fp8,0,0.45045332113901776
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,16,128,1,float16,float16,0,0.6275146802266439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,8,128,1,float16,fp8,0,1.0738826592763264
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,8,128,1,float16,float16,0,1.4128959973653157
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,1,128,1,float16,fp8,0,0.6129173437754313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,16,128,1,float16,fp8,0,0.613701343536377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,2,128,1,float16,fp8,0,0.612885316212972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,4,128,1,float16,fp8,0,0.5864160060882568
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,4,128,1,float16,float16,0,0.6689120133717855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,8,128,1,float16,fp8,0,0.5743093490600586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,16,128,1,float16,float16,0,0.3586719830830892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,16,128,1,float16,fp8,0,0.33957866827646893
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,1,128,1,float16,fp8,0,0.3244746724764506
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,2,128,1,float16,float16,0,0.35285866260528564
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,2,128,1,float16,fp8,0,0.3264159957567851
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,4,128,1,float16,fp8,0,0.3248533407847087
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,4,128,1,float16,float16,0,0.38036266962687176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,8,128,1,float16,float16,0,0.3572213252385457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,8,128,1,float16,fp8,0,0.32445865869522095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,1,128,1,float16,fp8,0,2.786645253499349
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,1,128,1,float16,float16,0,3.1296428044637046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,2,128,1,float16,float16,0,2.8896001180013022
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,2,128,1,float16,fp8,0,2.663322607676188
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,1,128,1,float16,float16,0,0.6667413711547852
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,2,128,1,float16,float16,0,0.7411359945933024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,8,128,1,float16,float16,0,0.7325653235117594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,1,128,1,float16,float16,0,0.40064001083374023
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,16,128,1,float16,float16,0,1.4861920674641926
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,4,128,1,float16,fp8,0,2.937450726826986
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,4,128,1,float16,float16,0,3.4107678731282554
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,8,128,1,float16,float16,0,2.893146514892578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,8,128,1,float16,fp8,0,2.6662774085998535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,16,128,1,float16,fp8,0,1.4557706514994304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,1,128,1,float16,float16,0,1.8479946454366047
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,1,128,1,float16,fp8,0,1.4613280296325684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,2,128,1,float16,float16,0,1.825061321258545
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,4,128,1,float16,float16,0,1.8249972661336262
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,8,128,1,float16,float16,0,1.6613066991170247
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,16,128,1,float16,float16,0,0.779584010442098
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,8,128,1,float16,fp8,0,1.458624045054118
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,1,128,1,float16,float16,0,0.9315253098805746
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,16,128,1,float16,fp8,0,0.7149973710378011
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,1,128,1,float16,fp8,0,0.7646826903025309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,2,128,1,float16,float16,0,0.8288906415303549
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,2,128,1,float16,fp8,0,0.7135626475016276
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,4,128,1,float16,fp8,0,0.7636586825052897
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,4,128,1,float16,float16,0,0.9315733114878336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,8,128,1,float16,float16,0,0.9367039998372396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,8,128,1,float16,fp8,0,0.7635306517283121
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,16,128,1,float16,float16,0,0.42686935265858966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,1,128,1,float16,float16,0,0.49008532365163165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,1,128,1,float16,fp8,0,0.41540801525115967
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,16,128,1,float16,fp8,0,0.38982399304707843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,2,128,1,float16,float16,0,0.4753173192342122
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,2,128,1,float16,fp8,0,0.4172106583913167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,4,128,1,float16,fp8,0,0.4167199929555257
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,4,128,1,float16,float16,0,0.4243946472803752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,8,128,1,float16,float16,0,0.4941440025965373
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,8,128,1,float16,fp8,0,0.4168320099512736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,1,128,1,float16,fp8,0,0.22635199626286825
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,16,128,1,float16,float16,0,0.2521599928538005
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,16,128,1,float16,fp8,0,0.22590933243433634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,2,128,1,float16,float16,0,0.2762506604194641
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,2,128,1,float16,fp8,0,0.24373332659403482
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,4,128,1,float16,float16,0,0.24658133586247763
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,4,128,1,float16,fp8,0,0.24343999226888022
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,8,128,1,float16,float16,0,0.24990399678548178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,8,128,1,float16,fp8,0,0.24311467011769614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,2,128,1,float16,fp8,0,1.787050724029541
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,4,128,1,float16,fp8,0,1.4606506029764812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,1,128,1,float16,fp8,0,1.5627039273579915
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,1,128,1,float16,float16,0,2.000101407368978
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,2,128,1,float16,fp8,0,1.5627199808756511
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,2,128,1,float16,float16,0,2.0196587244669595
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,4,128,1,float16,fp8,0,1.6763572692871094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,8,128,1,float16,float16,0,1.6994293530782063
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,16,128,1,float16,float16,0,0.8823893070220947
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,1,128,1,float16,float16,0,1.0626719792683919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,1,128,1,float16,fp8,0,0.8631306489308676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,8,128,1,float16,fp8,0,1.565392017364502
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,2,128,1,float16,float16,0,1.0721546808878581
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,2,128,1,float16,fp8,0,0.8635466893513998
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,4,128,1,float16,float16,0,1.044490655263265
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,4,128,1,float16,fp8,0,0.8628959655761719
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,1,128,1,float16,float16,0,0.27269333600997925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,8,128,1,float16,float16,0,0.8777706623077393
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,1,128,1,float16,float16,0,0.4999680121739705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,8,128,1,float16,fp8,0,0.863696018854777
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,16,128,1,float16,float16,0,0.4690399964650472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,1,128,1,float16,fp8,0,0.4586293299992879
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,2,128,1,float16,float16,0,0.4984373251597087
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,2,128,1,float16,fp8,0,0.42815999190012616
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,4,128,1,float16,float16,0,0.5235786835352579
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,4,128,1,float16,fp8,0,0.45958399772644043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,8,128,1,float16,float16,0,0.4668000141779582
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,8,128,1,float16,fp8,0,0.4288853406906128
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,16,128,1,float16,float16,0,0.27562665939331055
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,1,128,1,float16,float16,0,0.2811253269513448
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,16,128,1,float16,fp8,0,0.24126400550206503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,1,128,1,float16,fp8,0,0.25727466742197674
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,2,128,1,float16,float16,0,0.2617119948069255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,2,128,1,float16,fp8,0,0.2542720039685567
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,4,128,1,float16,float16,0,0.2836479942003886
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,4,128,1,float16,fp8,0,0.26103466749191284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,4,128,1,float16,float16,0,2.1020639737447104
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,8,128,1,float16,float16,0,0.26257065931955975
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,8,128,1,float16,fp8,0,0.2402986685434977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,1,128,1,float16,float16,0,0.17659733692804971
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,16,128,1,float16,float16,0,0.16355733076731363
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,2,128,1,float16,float16,0,0.17375467220942178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,16,128,1,float16,fp8,0,0.14338133732477823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,2,128,1,float16,fp8,0,0.15496533115704855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,4,128,1,float16,float16,0,0.15683199961980185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,16,128,1,float16,fp8,0,0.8094773292541504
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,4,128,1,float16,fp8,0,0.14409066239992777
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,8,128,1,float16,float16,0,0.1560373306274414
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,1,128,1,float16,float16,0,1.591968059539795
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,2,128,1,float16,float16,0,1.5924693743387859
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,1,128,1,float16,fp8,0,1.5717760721842449
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,4,128,1,float16,float16,0,1.5955732663472493
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,16,128,1,float16,fp8,0,0.437605341275533
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,16,128,1,float16,float16,0,0.8304959932963053
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,8,128,1,float16,float16,0,1.602128028869629
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,8,128,1,float16,fp8,0,1.4757332801818848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,1,128,1,float16,float16,0,0.9907253583272299
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,1,128,1,float16,fp8,0,0.8060213724772135
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,2,128,1,float16,float16,0,0.9987626870473226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,8,128,1,float16,fp8,0,0.1537813345591227
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,2,128,1,float16,fp8,0,0.8060373465220133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,4,128,1,float16,float16,0,0.8175573348999023
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,4,128,1,float16,fp8,0,0.8056960105895996
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,8,128,1,float16,float16,0,0.8746453126271566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,2,128,1,float16,fp8,0,1.4715627034505208
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,16,128,1,float16,float16,0,0.43632535139719647
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,8,128,1,float16,fp8,0,0.8085493246714274
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,1,128,1,float16,float16,0,0.5164053440093994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,2,128,1,float16,float16,0,0.46031467119852704
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,1,128,1,float16,fp8,0,0.4272373517354329
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,2,128,1,float16,fp8,0,0.4236106475194295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,4,128,1,float16,float16,0,0.4582986831665039
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,16,128,1,float16,fp8,0,0.760480006535848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,8,128,1,float16,float16,0,0.45979734261830646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,8,128,1,float16,fp8,0,0.4229280153910319
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,16,128,1,float16,float16,0,0.24315200249354044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,1,128,1,float16,fp8,0,0.15503467122713724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,16,128,1,float16,fp8,0,0.234607994556427
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,1,128,1,float16,float16,0,0.25307732820510864
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,1,128,1,float16,fp8,0,0.23441066344579062
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,2,128,1,float16,float16,0,0.26929599046707153
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,2,128,1,float16,fp8,0,0.23425600926081339
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,4,128,1,float16,float16,0,0.23700799544652304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,4,128,1,float16,fp8,0,0.21890133619308472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,8,128,1,float16,float16,0,0.2690986593564351
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,8,128,1,float16,fp8,0,0.23230934143066406
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,16,128,1,float16,float16,0,0.14739200472831726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,1,128,1,float16,float16,0,0.1536853313446045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,16,128,1,float16,fp8,0,0.12664000193277994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,1,128,1,float16,fp8,0,0.13342932860056558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,2,128,1,float16,float16,0,0.15373333295186362
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,2,128,1,float16,fp8,0,0.1366986632347107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,4,128,1,float16,float16,0,0.13926933209101358
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,4,128,1,float16,fp8,0,0.1358560025691986
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,8,128,1,float16,fp8,0,0.13595733046531677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,8,128,1,float16,float16,0,0.14019200205802917
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,16,128,1,float16,fp8,0,0.08237333099047343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,1,128,1,float16,float16,0,0.10020266969998677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,1,128,1,float16,fp8,0,0.08229866623878479
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,2,128,1,float16,float16,0,0.09238400061925252
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,2,128,1,float16,fp8,0,0.08927466471989949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,4,128,1,float16,float16,0,0.09914132952690125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,4,128,1,float16,fp8,0,0.08870399991671245
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,4,128,1,float16,fp8,0,0.3968586524327596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,8,128,1,float16,float16,0,0.09084266424179077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,8,128,1,float16,fp8,0,0.08967999617258708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,1,128,1,float16,fp8,0,0.9333653450012207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,2,128,1,float16,fp8,0,0.8875839710235596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,4,128,1,float16,float16,0,0.9606986840566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,4,128,1,float16,fp8,0,0.88754669825236
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,8,128,1,float16,float16,0,0.9648426373799642
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,8,128,1,float16,fp8,0,0.9472053050994873
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,16,128,1,float16,float16,0,0.504309336344401
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,16,128,1,float16,fp8,0,0.4917173385620117
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,16,128,1,float16,fp8,0,0.4216906627019246
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,16,128,1,float16,float16,0,0.09109333157539368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,1,128,1,float16,fp8,0,0.4893600145975749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,2,128,1,float16,float16,0,0.5326559940973917
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,4,128,1,float16,fp8,0,1.4732747077941895
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,2,128,1,float16,fp8,0,0.4885173241297404
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,4,128,1,float16,float16,0,0.4979519844055176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,4,128,1,float16,fp8,0,0.48870400587717694
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,8,128,1,float16,float16,0,0.49885332584381104
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,8,128,1,float16,fp8,0,0.4620000123977661
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,16,128,1,float16,fp8,0,0.2608106732368469
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,16,128,1,float16,float16,0,0.27028799057006836
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,2,128,1,float16,float16,0,0.9606133302052816
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,1,128,1,float16,float16,0,1.1645653247833252
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,1,128,1,float16,fp8,0,0.2624160051345825
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,2,128,1,float16,float16,0,0.2714719971021016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,2,128,1,float16,fp8,0,0.26243199904759723
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,4,128,1,float16,fp8,0,0.2614453236262004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,8,128,1,float16,fp8,0,0.26216532786687213
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,8,128,1,float16,float16,0,0.3073280056317647
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,1,128,1,float16,float16,0,0.16797866423924765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,16,128,1,float16,float16,0,0.1595306694507599
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,16,128,1,float16,fp8,0,0.1390506625175476
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,1,128,1,float16,fp8,0,0.1434346636136373
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,2,128,1,float16,fp8,0,0.14867732922236124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,4,128,1,float16,float16,0,0.15007999539375305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,4,128,1,float16,fp8,0,0.14725333452224731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,8,128,1,float16,float16,0,0.15148799618085226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,8,128,1,float16,fp8,0,0.13759467005729675
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,16,128,1,float16,float16,0,0.09894399841626485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,1,128,1,float16,float16,0,0.09964799880981445
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,1,128,1,float16,float16,0,0.600762685139974
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,1,128,1,float16,fp8,0,0.08992532889048259
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,2,128,1,float16,float16,0,0.09071999788284302
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,2,128,1,float16,fp8,0,0.0890933374563853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,4,128,1,float16,fp8,0,0.08962133526802063
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,4,128,1,float16,float16,0,0.10115733742713928
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,8,128,1,float16,float16,0,0.09014399846394856
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,8,128,1,float16,fp8,0,0.0888213316599528
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,1,128,1,float16,float16,0,0.06037333110968272
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,16,128,1,float16,float16,0,0.06062399844328562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,1,128,1,float16,fp8,0,0.06006933252016703
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,16,128,1,float16,fp8,0,0.05603733162085215
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,2,128,1,float16,float16,0,0.06047999858856201
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,2,128,1,float16,fp8,0,0.060319999853769936
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,4,128,1,float16,float16,0,0.06677333513895671
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,8,128,1,float16,float16,0,0.06001066664854685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,8,128,1,float16,fp8,0,0.055888002117474876
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,1,128,1,float16,float16,0,0.2825973431269328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,4,128,1,float16,float16,0,0.30541332562764484
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,1,128,1,float16,float16,0,0.9410400390625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,1,128,1,float16,fp8,0,0.9249173005421957
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,2,128,1,float16,float16,0,0.9955999851226807
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,2,128,1,float16,fp8,0,0.8748853206634521
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,4,128,1,float16,float16,0,0.9447413285573324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,16,128,1,float16,fp8,0,0.08312533299128215
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,4,128,1,float16,fp8,0,0.8756053447723389
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,8,128,1,float16,float16,0,0.9501226743062338
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,16,128,1,float16,float16,0,0.4945760170618693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,1,128,1,float16,float16,0,0.48234665393829346
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,16,128,1,float16,fp8,0,0.4551680088043213
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,8,128,1,float16,fp8,0,0.8791946570078532
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,4,128,1,float16,fp8,0,0.056090667843818665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,1,128,1,float16,fp8,0,0.4747733275095622
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,4,128,1,float16,float16,0,0.4870293140411377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,2,128,1,float16,fp8,0,0.44783465067545575
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,2,128,1,float16,float16,0,0.583402673403422
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,4,128,1,float16,fp8,0,0.44892799854278564
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,8,128,1,float16,float16,0,0.4876319964726766
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,8,128,1,float16,fp8,0,0.47702399889628094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,16,128,1,float16,float16,0,0.26212799549102783
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,1,128,1,float16,float16,0,0.2952853242556254
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,16,128,1,float16,fp8,0,0.23964265982309976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,2,128,1,float16,float16,0,0.16725865999857584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,2,128,1,float16,float16,0,0.25520533323287964
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,4,128,1,float16,float16,0,0.2922666668891907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,4,128,1,float16,fp8,0,0.2516000072161357
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,8,128,1,float16,float16,0,0.2584746678670247
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,1,128,1,float16,float16,0,0.15595199664433798
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,1,128,1,float16,fp8,0,0.13847999771436056
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,16,128,1,float16,float16,0,0.1581706702709198
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,16,128,1,float16,fp8,0,0.13236799836158752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,2,128,1,float16,float16,0,0.15713066856066385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,4,128,1,float16,float16,0,0.15377599994341531
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,4,128,1,float16,fp8,0,0.14242666959762573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,8,128,1,float16,float16,0,0.14313600460688272
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,8,128,1,float16,fp8,0,0.1397119959195455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,16,128,1,float16,fp8,0,0.08193600177764893
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,16,128,1,float16,float16,0,0.09331199526786804
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,1,128,1,float16,float16,0,0.0835093359152476
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,1,128,1,float16,fp8,0,0.07655466596285503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,2,128,1,float16,float16,0,0.09258133172988892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,2,128,1,float16,fp8,0,0.08204799890518188
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,4,128,1,float16,float16,0,0.08437333504358928
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,8,128,1,float16,fp8,0,0.08071466783682506
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,8,128,1,float16,float16,0,0.08462933699289958
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,16,128,1,float16,float16,0,0.05460800230503082
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,1,128,1,float16,float16,0,0.05365333457787832
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,16,128,1,float16,fp8,0,0.04979733129342397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,2,128,1,float16,float16,0,0.053946668903032936
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,2,128,1,float16,fp8,0,0.051962668697039284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,1,128,1,float16,fp8,0,0.2525493303934733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,4,128,1,float16,fp8,0,0.051813334226608276
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,2,128,1,float16,fp8,0,0.23644266525904337
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,8,128,1,float16,float16,0,0.05386666456858317
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,8,128,1,float16,fp8,0,0.052005335688591
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,8,128,1,float16,fp8,0,0.23667732874552408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,1,128,1,float16,float16,0,0.03825066735347112
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,16,128,1,float16,fp8,0,0.03148266673088074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,1,128,1,float16,fp8,0,0.03366400053103765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,2,128,1,float16,float16,0,0.037615999579429626
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,2,128,1,float16,fp8,0,0.13828266660372415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,2,128,1,float16,fp8,0,0.03342399994532267
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,4,128,1,float16,fp8,0,0.03342933456103007
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,4,128,1,float16,float16,0,0.033728001018365227
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,8,128,1,float16,float16,0,0.033861334125200905
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,8,128,1,float16,fp8,0,0.033301333586374916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,1,128,1,float16,float16,0,0.6236106554667155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,4,128,1,float16,fp8,0,0.0751146674156189
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,2,128,1,float16,float16,0,0.6935733159383138
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,2,128,1,float16,fp8,0,0.569599986076355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,4,128,1,float16,float16,0,0.5932480096817017
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,1,128,1,float16,fp8,0,0.049914668003718056
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,4,128,1,float16,float16,0,0.05373333394527435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,16,128,1,float16,fp8,0,0.2890613277753194
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,16,128,1,float16,float16,0,0.31445332368214923
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,1,128,1,float16,float16,0,0.30565333366394043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,8,128,1,float16,float16,0,0.5965919891993204
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,8,128,1,float16,fp8,0,0.5786453485488892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,1,128,1,float16,fp8,0,0.28436267375946045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,4,128,1,float16,float16,0,0.35290666421254474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,16,128,1,float16,float16,0,0.033717334270477295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,4,128,1,float16,fp8,0,0.299018661181132
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,2,128,1,float16,float16,0,0.35210132598876953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,1,128,1,float16,float16,0,0.18279467026392618
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,8,128,1,float16,fp8,0,0.30034132798512775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,16,128,1,float16,float16,0,0.17454934120178223
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,16,128,1,float16,fp8,0,0.15477866927782694
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,8,128,1,float16,float16,0,0.31013866265614826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,1,128,1,float16,fp8,0,0.16190399726231894
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,2,128,1,float16,float16,0,0.16743467251459757
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,1,128,1,float16,fp8,0,0.5478560129801432
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,4,128,1,float16,fp8,0,0.15243200461069742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,8,128,1,float16,float16,0,0.1672053337097168
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,8,128,1,float16,fp8,0,0.15397333105405173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,16,128,1,float16,fp8,0,0.08832533160845439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,1,128,1,float16,float16,0,0.10342400272687276
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,16,128,1,float16,float16,0,0.09650666515032451
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,4,128,1,float16,fp8,0,0.5494613250096639
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,2,128,1,float16,float16,0,0.09453866879145305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,2,128,1,float16,fp8,0,0.09149332841237386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,4,128,1,float16,float16,0,0.09495466947555542
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,8,128,1,float16,float16,0,0.09539199868837993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,8,128,1,float16,fp8,0,0.08514133095741272
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,16,128,1,float16,float16,0,0.06353599826494853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,16,128,1,float16,fp8,0,0.051914667089780174
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,2,128,1,float16,fp8,0,0.28405867020289105
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,1,128,1,float16,float16,0,0.06010133524735769
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,2,128,1,float16,float16,0,0.06164266665776571
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,2,128,1,float16,fp8,0,0.05514666438102722
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,4,128,1,float16,float16,0,0.061610668897628784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,4,128,1,float16,fp8,0,0.05568000177542368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,8,128,1,float16,float16,0,0.062047998110453285
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,2,128,1,float16,fp8,0,0.1625599960486094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,8,128,1,float16,fp8,0,0.05197866757710775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,1,128,1,float16,float16,0,0.04040000090996424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,16,128,1,float16,float16,0,0.03812800099452337
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,16,128,1,float16,fp8,0,0.03737599899371465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,1,128,1,float16,fp8,0,0.03745600084463755
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,1,128,1,float16,fp8,0,0.09156266848246257
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,2,128,1,float16,float16,0,0.037658666570981346
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,4,128,1,float16,float16,0,0.037632000943024956
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,4,128,1,float16,fp8,0,0.037290667494138084
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,8,128,1,float16,float16,0,0.037445334096749626
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,8,128,1,float16,fp8,0,0.03472000112136205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,4,128,1,float16,fp8,0,0.08528533577919006
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,1,128,1,float16,float16,0,0.029285334050655365
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,16,128,1,float16,fp8,0,0.025258667767047882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,16,128,1,float16,float16,0,0.027429332335789997
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,2,128,1,float16,float16,0,0.02959466725587845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,2,128,1,float16,fp8,0,0.02518933266401291
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,4,128,1,float16,float16,0,0.029440000653266907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,4,128,1,float16,fp8,0,0.027210667729377747
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,8,128,1,float16,float16,0,0.027061333258946735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,1,128,1,float16,fp8,0,0.05533333122730255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,1,128,1,float16,float16,0,0.6430559953053793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,1,128,1,float16,fp8,0,0.5754133462905884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,4,128,1,float16,float16,0,0.17677332957585654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,2,128,1,float16,float16,0,0.6172746817270914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,2,128,1,float16,fp8,0,0.037530665596326195
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,2,128,1,float16,fp8,0,0.5760960181554159
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,8,128,1,float16,float16,0,0.6479359865188599
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,4,128,1,float16,fp8,0,0.5771306753158569
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,1,128,1,float16,fp8,0,0.025920001169045765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,16,128,1,float16,float16,0,0.3252799908320109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,8,128,1,float16,fp8,0,0.5804106791814169
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,1,128,1,float16,float16,0,0.35711467266082764
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,8,128,1,float16,fp8,0,0.02737066646416982
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,2,128,1,float16,float16,0,0.3183893362681071
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,2,128,1,float16,fp8,0,0.30929599205652875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,8,128,1,float16,float16,0,0.3334346612294515
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,1,128,1,float16,fp8,0,0.2957119941711426
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,4,128,1,float16,fp8,0,0.3090239961942037
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,4,128,1,float16,float16,0,0.36875200271606445
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,8,128,1,float16,fp8,0,0.2991679906845093
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,1,128,1,float16,float16,0,0.16827734311421713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,16,128,1,float16,fp8,0,0.16426133116086325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,16,128,1,float16,float16,0,0.172650674978892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,2,128,1,float16,fp8,0,0.15662399927775064
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,4,128,1,float16,float16,0,0.16869866847991943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,2,128,1,float16,float16,0,0.18628267447153726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,4,128,1,float16,fp8,0,0.1660426656405131
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,8,128,1,float16,fp8,0,0.15811733404795328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,1,128,1,float16,float16,0,0.09629333019256592
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,4,128,1,float16,float16,0,0.6200106541315714
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,16,128,1,float16,fp8,0,0.08675733208656311
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,16,128,1,float16,float16,0,0.09601599971453349
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,2,128,1,float16,float16,0,0.09861866633097331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,4,128,1,float16,float16,0,0.0960053304831187
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,4,128,1,float16,fp8,0,0.08853866656621297
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,8,128,1,float16,float16,0,0.09513599673906963
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,2,128,1,float16,fp8,0,0.09151466687520345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,16,128,1,float16,float16,0,0.05773866673310598
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,1,128,1,float16,float16,0,0.06003733476003011
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,16,128,1,float16,fp8,0,0.30080000559488934
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,16,128,1,float16,fp8,0,0.050714666644732155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,1,128,1,float16,fp8,0,0.0525439977645874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,2,128,1,float16,float16,0,0.05993066728115082
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,4,128,1,float16,float16,0,0.05769599974155426
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,2,128,1,float16,fp8,0,0.053690666953722634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,4,128,1,float16,fp8,0,0.05193066596984863
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,8,128,1,float16,float16,0,0.05606933434804281
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,8,128,1,float16,fp8,0,0.052058666944503784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,1,128,1,float16,float16,0,0.03929600119590759
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,16,128,1,float16,float16,0,0.035418666899204254
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,2,128,1,float16,float16,0,0.03786666691303253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,1,128,1,float16,fp8,0,0.1556000014146169
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,2,128,1,float16,fp8,0,0.03490666548411051
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,4,128,1,float16,float16,0,0.03526933242877325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,4,128,1,float16,fp8,0,0.03346666693687439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,8,128,1,float16,float16,0,0.03558400024970373
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,8,128,1,float16,fp8,0,0.03278400003910065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,1,128,1,float16,fp8,0,0.08557333548863728
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,1,128,1,float16,float16,0,0.02510400116443634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,16,128,1,float16,float16,0,0.023344000180562336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,1,128,1,float16,fp8,0,0.02110933264096578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,2,128,1,float16,float16,0,0.025114665428797405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,2,128,1,float16,fp8,0,0.021274665991465252
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,4,128,1,float16,float16,0,0.023306667804718018
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,4,128,1,float16,fp8,0,0.021386665602525074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,8,128,1,float16,fp8,0,0.0918933351834615
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,8,128,1,float16,float16,0,0.023285334308942158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,8,128,1,float16,fp8,0,0.021802666286627453
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,1,128,1,float16,float16,0,0.023200000325838726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,16,128,1,float16,fp8,0,0.020853333175182343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,1,128,1,float16,fp8,0,0.021162666380405426
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,2,128,1,float16,float16,0,0.02123733361562093
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,2,128,1,float16,fp8,0,0.021104000508785248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,4,128,1,float16,float16,0,0.02120000123977661
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,4,128,1,float16,fp8,0,0.021194666624069214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,8,128,1,float16,float16,0,0.021295999487241108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,1,128,1,float16,fp8,0,0.03401600072781245
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,8,128,1,float16,float16,0,0.17678932348887125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,16,128,1,float16,fp8,0,0.02120000123977661
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,1,128,1,float16,fp8,0,0.4324479897816976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,2,128,1,float16,float16,0,0.5075306495030721
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,16,128,1,float16,float16,0,0.021477334201335907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,2,128,1,float16,fp8,0,0.43718934059143066
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,4,128,1,float16,float16,0,0.45904000600179035
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,8,128,1,float16,fp8,0,0.021183999876181286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,16,128,1,float16,fp8,0,0.03469333300987879
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,4,128,1,float16,fp8,0,0.4249333143234253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,1,128,1,float16,float16,0,0.24979732433954874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,1,128,1,float16,fp8,0,0.21850667397181192
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,8,128,1,float16,float16,0,0.4988746643066406
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,16,128,1,float16,float16,0,0.2407253384590149
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,1,128,1,float16,float16,0,0.45628265539805096
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,2,128,1,float16,fp8,0,0.22592532634735107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,8,128,1,float16,fp8,0,0.42667198181152344
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,4,128,1,float16,float16,0,0.24131200710932413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,2,128,1,float16,float16,0,0.2365973393122355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,16,128,1,float16,fp8,0,0.2205866575241089
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,2,128,1,float16,float16,0,0.13143466909726462
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,1,128,1,float16,fp8,0,0.12130133310953777
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,8,128,1,float16,float16,0,0.23712533712387085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,8,128,1,float16,fp8,0,0.22594666481018066
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,2,128,1,float16,fp8,0,0.11948266625404358
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,4,128,1,float16,float16,0,0.12706666191418967
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,4,128,1,float16,fp8,0,0.22506133715311685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,8,128,1,float16,float16,0,0.13274133205413818
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,8,128,1,float16,fp8,0,0.12054933110872905
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,4,128,1,float16,fp8,0,0.12123733758926392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,16,128,1,float16,float16,0,0.07220800220966339
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,16,128,1,float16,fp8,0,0.11735999584197998
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,2,128,1,float16,fp8,0,0.06825066606203715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,2,128,1,float16,float16,0,0.07500266532103221
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,16,128,1,float16,fp8,0,0.06507200002670288
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,1,128,1,float16,fp8,0,0.06400533517201741
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,1,128,1,float16,float16,0,0.0757013310988744
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,4,128,1,float16,float16,0,0.07050133248170216
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,4,128,1,float16,fp8,0,0.06599999964237213
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,8,128,1,float16,float16,0,0.07196266452471416
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,8,128,1,float16,fp8,0,0.06423466900984447
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,16,128,1,float16,float16,0,0.04348800083001455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,16,128,1,float16,fp8,0,0.03773866593837738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,1,128,1,float16,fp8,0,0.03923733284076055
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,1,128,1,float16,float16,0,0.1349066694577535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,4,128,1,float16,float16,0,0.04368533194065094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,2,128,1,float16,fp8,0,0.03721066564321518
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,4,128,1,float16,fp8,0,0.039306665460268654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,8,128,1,float16,fp8,0,0.037674665451049805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,8,128,1,float16,float16,0,0.04144000013669332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,1,128,1,float16,float16,0,0.027141332626342773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,1,128,1,float16,fp8,0,0.025221332907676697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,2,128,1,float16,fp8,0,0.023290666441122692
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,16,128,1,float16,float16,0,0.026554666459560394
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,2,128,1,float16,float16,0,0.02743999908367793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,4,128,1,float16,float16,0,0.02759466568628947
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,8,128,1,float16,float16,0,0.025237334271272022
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,16,128,1,float16,float16,0,0.017290666699409485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,16,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,1,128,1,float16,float16,0,0.016949333250522614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,1,128,1,float16,fp8,0,0.01492799942692121
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,2,128,1,float16,float16,0,0.017173333714405697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,2,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,4,128,1,float16,float16,0,0.017173333714405697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,4,128,1,float16,fp8,0,0.015008000036080679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,8,128,1,float16,float16,0,0.01720000058412552
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,8,128,1,float16,fp8,0,0.014959999670584997
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,16,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,16,128,1,float16,float16,0,0.12849066654841104
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,16,128,1,float16,float16,0,0.016970666746298473
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,1,128,1,float16,float16,0,0.042277331153551735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,1,128,1,float16,float16,0,0.015370666980743408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,2,128,1,float16,float16,0,0.01676799977819125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,1,128,1,float16,fp8,0,0.015103999525308609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,2,128,1,float16,fp8,0,0.015002666662136713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,4,128,1,float16,float16,0,0.015344000111023584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,4,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,8,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,8,128,1,float16,float16,0,0.015333333363135656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,1,128,1,float16,float16,0,0.016016000260909397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,16,128,1,float16,float16,0,0.015013333410024643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,16,128,1,float16,fp8,0,0.02313599983851115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,2,128,1,float16,float16,0,0.015301333119471868
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,2,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,4,128,1,float16,fp8,0,0.025008000433444977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,8,128,1,float16,fp8,0,0.025077333052953083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,8,128,1,float16,float16,0,0.014981333166360855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,8,128,1,float16,fp8,0,0.014912000546852747
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,1,128,1,float16,float16,0,0.1869973341623942
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,1,128,1,float16,fp8,0,0.1755680044492086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,2,128,1,float16,float16,0,0.043738668163617454
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,16,128,1,float16,fp8,0,0.014965333044528961
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,2,128,1,float16,float16,0,0.18703999121983847
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,4,128,1,float16,float16,0,0.014997333288192749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,4,128,1,float16,float16,0,0.19528534015019736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,8,128,1,float16,float16,0,0.18742400407791138
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,16,128,1,float16,float16,0,0.12948266665140787
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,4,128,1,float16,fp8,0,0.1782133380572001
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,8,128,1,float16,fp8,0,0.17478932936986288
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,16,128,1,float16,fp8,0,0.09507200121879578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,2,128,1,float16,float16,0,0.10288533568382263
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,2,128,1,float16,fp8,0,0.17869333426157633
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,1,128,1,float16,float16,0,0.1037493348121643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,1,128,1,float16,fp8,0,0.09263466795285542
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,4,128,1,float16,fp8,0,0.09325333436330159
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,2,128,1,float16,fp8,0,0.09289600451787312
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,16,128,1,float16,float16,0,0.07315200070540111
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,8,128,1,float16,float16,0,0.10037866234779358
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,4,128,1,float16,float16,0,0.10379733641942342
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,8,128,1,float16,fp8,0,0.09320533275604248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,16,128,1,float16,fp8,0,0.0516480008761088
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,1,128,1,float16,float16,0,0.054229333996772766
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,1,128,1,float16,fp8,0,0.049829334020614624
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,2,128,1,float16,fp8,0,0.051829333106676735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,8,128,1,float16,float16,0,0.05563200016816457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,4,128,1,float16,fp8,0,0.0516480008761088
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,2,128,1,float16,float16,0,0.05641066531340281
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,4,128,1,float16,float16,0,0.058090666929880776
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,1,128,1,float16,fp8,0,0.015173333386580149
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,8,128,1,float16,fp8,0,0.05006400247414907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,1,128,1,float16,float16,0,0.033439998825391136
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,16,128,1,float16,float16,0,0.035386666655540466
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,16,128,1,float16,fp8,0,0.031104000906149547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,1,128,1,float16,fp8,0,0.031445334355036415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,2,128,1,float16,fp8,0,0.03124266614516576
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,4,128,1,float16,float16,0,0.0351946676770846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,8,128,1,float16,float16,0,0.034527999659379326
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,8,128,1,float16,fp8,0,0.0312266672650973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,1,128,1,float16,float16,0,0.021344001094500225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,16,128,1,float16,float16,0,0.020842666427294414
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,16,128,1,float16,fp8,0,0.019306667149066925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,1,128,1,float16,fp8,0,0.019061333189407986
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,2,128,1,float16,fp8,0,0.019088000059127808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,2,128,1,float16,float16,0,0.020714666694402695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,4,128,1,float16,float16,0,0.020970667401949566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,4,128,1,float16,fp8,0,0.01921066641807556
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,8,128,1,float16,fp8,0,0.019050666441520054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,16,128,1,float16,float16,0,0.013066666821638743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,1,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,16,128,1,float16,fp8,0,0.012837332983811697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,2,128,1,float16,float16,0,0.013077333569526672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,2,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,4,128,1,float16,float16,0,0.01310933381319046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,4,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,8,128,1,float16,float16,0,0.014949332922697067
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,8,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,16,128,1,float16,float16,0,0.012906666845083237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,16,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,1,128,1,float16,fp8,0,0.012666666259368261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,2,128,1,float16,float16,0,0.01302933320403099
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,2,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,4,128,1,float16,float16,0,0.012917333592971167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,4,128,1,float16,fp8,0,0.012794667234023413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,4,128,1,float16,fp8,0,0.015072000523408255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,8,128,1,float16,float16,0,0.013013333082199097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,4,128,1,float16,fp8,0,0.03050133337577184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,16,128,1,float16,fp8,0,0.012789333860079447
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,1,128,1,float16,float16,0,0.012693333129088083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,1,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,2,128,1,float16,float16,0,0.012709333250919977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,2,128,1,float16,fp8,0,0.012714666624863943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,4,128,1,float16,float16,0,0.012698666503032049
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,4,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,8,128,1,float16,float16,0,0.020202666521072388
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,8,128,1,float16,float16,0,0.012768000364303589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,8,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,1,128,1,float16,float16,0,0.01505600040157636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,1,128,1,float16,float16,0,0.012837332983811697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,1,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,16,128,1,float16,fp8,0,0.012554666648308435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,2,128,1,float16,float16,0,0.012826666235923767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,2,128,1,float16,fp8,0,0.012794667234023413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,4,128,1,float16,float16,0,0.012608000387748083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,4,128,1,float16,fp8,0,0.012752000242471695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,8,128,1,float16,fp8,0,0.012586666891972223
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,8,128,1,float16,float16,0,0.012815999488035837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,1,128,1,float16,float16,0,0.012885333349307379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,2,128,1,float16,float16,0,0.03455466777086258
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,1,128,1,float16,float16,0,0.12572800119717917
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,16,128,1,float16,float16,0,0.01303999995191892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,1,128,1,float16,fp8,0,0.11746133367220561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,2,128,1,float16,float16,0,0.12707733114560446
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,2,128,1,float16,fp8,0,0.11725866794586182
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,4,128,1,float16,fp8,0,0.11720533172289531
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,16,128,1,float16,float16,0,0.07393600046634674
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,4,128,1,float16,float16,0,0.12654399871826172
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,16,128,1,float16,fp8,0,0.0629066675901413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,16,128,1,float16,float16,0,0.013056000073750814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,8,128,1,float16,fp8,0,0.11733866731325786
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,8,128,1,float16,float16,0,0.12337066729863484
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,1,128,1,float16,float16,0,0.0680266668399175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,2,128,1,float16,float16,0,0.06632533172766368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,1,128,1,float16,fp8,0,0.06346133351325989
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,2,128,1,float16,fp8,0,0.062208001812299095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,8,128,1,float16,float16,0,0.06771733363469441
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,8,128,1,float16,fp8,0,0.012698666503032049
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,8,128,1,float16,fp8,0,0.06215466558933258
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,4,128,1,float16,fp8,0,0.06215466558933258
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,16,128,1,float16,float16,0,0.041637333730856575
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,16,128,1,float16,fp8,0,0.033520000676314034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,1,128,1,float16,fp8,0,0.033600000043710075
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,1,128,1,float16,float16,0,0.03756800045569738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,2,128,1,float16,fp8,0,0.03365333378314972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,2,128,1,float16,float16,0,0.03763733307520548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,4,128,1,float16,float16,0,0.03826133410135905
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,4,128,1,float16,fp8,0,0.035349334279696144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,8,128,1,float16,float16,0,0.037178667883078255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,8,128,1,float16,fp8,0,0.03321066747109095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,16,128,1,float16,fp8,0,0.021615999440352123
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,16,128,1,float16,float16,0,0.023237332701683044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,1,128,1,float16,float16,0,0.023647998770078022
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,1,128,1,float16,fp8,0,0.02231466770172119
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,2,128,1,float16,float16,0,0.022954667607943218
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,2,128,1,float16,fp8,0,0.022511998812357586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,4,128,1,float16,float16,0,0.023290666441122692
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,4,128,1,float16,fp8,0,0.021301334102948506
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,8,128,1,float16,float16,0,0.023381332556406658
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,8,128,1,float16,fp8,0,0.021962667504946392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,16,128,1,float16,float16,0,0.015050667027632395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,1,128,1,float16,float16,0,0.015013333410024643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,1,128,1,float16,fp8,0,0.014954666296641031
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,16,128,1,float16,fp8,0,0.014896000425020853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,2,128,1,float16,float16,0,0.015114666273196539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,4,128,1,float16,float16,0,0.014815999815861383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,8,128,1,float16,float16,0,0.01516266663869222
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,8,128,1,float16,fp8,0,0.014901333798964819
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,1,128,1,float16,float16,0,0.010879999647537867
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,1,128,1,float16,fp8,0,0.011002667248249054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,16,128,1,float16,fp8,0,0.010762666662534079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,16,128,1,float16,float16,0,0.010992000500361124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,2,128,1,float16,float16,0,0.011029332876205444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,2,128,1,float16,fp8,0,0.010794666906197866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,4,128,1,float16,float16,0,0.010901333143313726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,4,128,1,float16,fp8,0,0.010847999403874079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,4,128,1,float16,float16,0,0.06764799853165944
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,8,128,1,float16,float16,0,0.010970667004585266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,8,128,1,float16,fp8,0,0.010634666929642359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,1,128,1,float16,float16,0,0.010575999816258749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,16,128,1,float16,float16,0,0.010869332899649939
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,16,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,2,128,1,float16,float16,0,0.011034666250149408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,4,128,1,float16,float16,0,0.010586666564146677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,1,128,1,float16,fp8,0,0.010064000263810158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,4,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,8,128,1,float16,float16,0,0.010954666882753372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,8,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,1,128,1,float16,float16,0,0.010992000500361124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,1,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,16,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,2,128,1,float16,fp8,0,0.009082666908701261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,4,128,1,float16,float16,0,0.011007999380429586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,4,128,1,float16,fp8,0,0.00914666677514712
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,8,128,1,float16,float16,0,0.010992000500361124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,8,128,1,float16,fp8,0,0.010304000228643417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,1,128,1,float16,float16,0,0.01099733387430509
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,16,128,1,float16,float16,0,0.010661333799362183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,16,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,1,128,1,float16,fp8,0,0.009904000287254652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,2,128,1,float16,float16,0,0.010693332801262537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,2,128,1,float16,fp8,0,0.009258666386206945
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,4,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,4,128,1,float16,fp8,0,0.014639999717473984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,8,128,1,float16,fp8,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,2,128,1,float16,fp8,0,0.010698666175206503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,1,128,1,float16,float16,0,0.10802132884661357
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,1,128,1,float16,fp8,0,0.09969066580136617
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,16,128,1,float16,float16,0,0.010773333410422007
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,2,128,1,float16,float16,0,0.010581333190202713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,2,128,1,float16,float16,0,0.10729066530863444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,2,128,1,float16,fp8,0,0.0990826686223348
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,4,128,1,float16,float16,0,0.10617599884668986
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,4,128,1,float16,fp8,0,0.09915199875831604
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,4,128,1,float16,float16,0,0.010992000500361124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,8,128,1,float16,float16,0,0.10689600308736165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,16,128,1,float16,float16,0,0.06001600126425425
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,16,128,1,float16,fp8,0,0.05366399884223938
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,8,128,1,float16,fp8,0,0.09975999593734741
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,1,128,1,float16,float16,0,0.05622933308283488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,1,128,1,float16,fp8,0,0.053898667295773826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,2,128,1,float16,fp8,0,0.05389333268006643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,4,128,1,float16,float16,0,0.05849599838256836
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,2,128,1,float16,float16,0,0.060266668597857155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,4,128,1,float16,fp8,0,0.054042667150497437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,8,128,1,float16,float16,0,0.05755733450253805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,8,128,1,float16,fp8,0,0.053642665346463524
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,1,128,1,float16,float16,0,0.03346133232116699
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,16,128,1,float16,float16,0,0.033520000676314034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,16,128,1,float16,fp8,0,0.02940266579389572
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,1,128,1,float16,fp8,0,0.03033600002527237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,2,128,1,float16,float16,0,0.03281066566705704
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,2,128,1,float16,fp8,0,0.029301332930723827
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,4,128,1,float16,float16,0,0.03358400116364161
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,4,128,1,float16,fp8,0,0.030389333764712017
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,8,128,1,float16,fp8,0,0.029525332152843475
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,8,128,1,float16,float16,0,0.03164266546567281
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,1,128,1,float16,float16,0,0.021002667645613354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,16,128,1,float16,float16,0,0.021205333371957142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,1,128,1,float16,fp8,0,0.019280000279347103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,16,128,1,float16,fp8,0,0.019093333433071773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,2,128,1,float16,fp8,0,0.019178666174411774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,4,128,1,float16,fp8,0,0.019109333554903667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,8,128,1,float16,float16,0,0.021903999149799347
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,8,128,1,float16,fp8,0,0.01923199991385142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,8,128,1,float16,float16,0,0.010911999891201654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,16,128,1,float16,float16,0,0.013327999661366144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,16,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,1,128,1,float16,float16,0,0.01309866706530253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,1,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,2,128,1,float16,float16,0,0.01461333284775416
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,2,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,4,128,1,float16,float16,0,0.013381333400805792
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,4,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,8,128,1,float16,float16,0,0.013093333691358566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,8,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,1,128,1,float16,float16,0,0.010698666175206503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,16,128,1,float16,float16,0,0.01102399950226148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,1,128,1,float16,fp8,0,0.010586666564146677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,16,128,1,float16,fp8,0,0.00927466650803884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,2,128,1,float16,float16,0,0.010703999549150467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,4,128,1,float16,float16,0,0.010863999525705973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,2,128,1,float16,fp8,0,0.010581333190202713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,4,128,1,float16,fp8,0,0.010480000327030817
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,8,128,1,float16,float16,0,0.010656000425418219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,8,128,1,float16,fp8,0,0.010858666151762009
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,1,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,16,128,1,float16,float16,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,1,128,1,float16,fp8,0,0.009189333145817121
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,2,128,1,float16,float16,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,4,128,1,float16,float16,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,2,128,1,float16,fp8,0,0.00877333308259646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,8,128,1,float16,float16,0,0.00879466657837232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,2,128,1,float16,float16,0,0.021066665649414062
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,8,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,1,128,1,float16,fp8,0,0.008762666955590248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,4,128,1,float16,float16,0,0.021082667013009388
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,16,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,16,128,1,float16,float16,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,2,128,1,float16,float16,0,0.008821333448092142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,4,128,1,float16,float16,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,2,128,1,float16,fp8,0,0.008858666444818178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,2,128,1,float16,fp8,0,0.01481066644191742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,8,128,1,float16,float16,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,8,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,1,128,1,float16,float16,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,16,128,1,float16,fp8,0,0.008821333448092142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,16,128,1,float16,float16,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,2,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,2,128,1,float16,fp8,0,0.008832000195980072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,4,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,8,128,1,float16,float16,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,8,128,1,float16,fp8,0,0.008746666833758354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,16,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,1,128,1,float16,float16,0,0.09698133667310078
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,4,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,1,128,1,float16,float16,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,2,128,1,float16,float16,0,0.10003200173377991
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,4,128,1,float16,fp8,0,0.008837333569924036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,2,128,1,float16,fp8,0,0.09095999598503113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,4,128,1,float16,float16,0,0.09712533156077068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,1,128,1,float16,fp8,0,0.008629333227872849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,4,128,1,float16,fp8,0,0.09001599748929341
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,16,128,1,float16,float16,0,0.05392533540725708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,16,128,1,float16,fp8,0,0.04818666477998098
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,1,128,1,float16,float16,0,0.05301866432030996
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,8,128,1,float16,float16,0,0.096778670946757
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,8,128,1,float16,fp8,0,0.08885866403579712
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,1,128,1,float16,fp8,0,0.04858666658401489
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,2,128,1,float16,float16,0,0.0543093333641688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,4,128,1,float16,float16,0,0.05389333268006643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,4,128,1,float16,fp8,0,0.0489333321650823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,2,128,1,float16,fp8,0,0.04994133114814758
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,8,128,1,float16,float16,0,0.054005334774653115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,8,128,1,float16,fp8,0,0.04987200101216634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,16,128,1,float16,float16,0,0.03120533376932144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,16,128,1,float16,fp8,0,0.027269333600997925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,1,128,1,float16,float16,0,0.031471999982992806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,2,128,1,float16,fp8,0,0.028010666370391846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,4,128,1,float16,fp8,0,0.027317332724730175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,8,128,1,float16,fp8,0,0.027327999472618103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,8,128,1,float16,float16,0,0.03142933299144109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,16,128,1,float16,float16,0,0.019130667050679524
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,16,128,1,float16,fp8,0,0.018320000420014065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,1,128,1,float16,float16,0,0.019178666174411774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,1,128,1,float16,fp8,0,0.019194666296243668
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,2,128,1,float16,fp8,0,0.018853332847356796
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,4,128,1,float16,float16,0,0.019152000546455383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,4,128,1,float16,fp8,0,0.018874666343132656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,8,128,1,float16,float16,0,0.019152000546455383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,8,128,1,float16,fp8,0,0.01821333294113477
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,16,128,1,float16,float16,0,0.012997332960367203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,16,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,1,128,1,float16,float16,0,0.012938667088747025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,1,128,1,float16,fp8,0,0.012725333372751871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,2,128,1,float16,float16,0,0.013077333569526672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,2,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,4,128,1,float16,float16,0,0.012949333836634954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,4,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,1,128,1,float16,fp8,0,0.09076266487439473
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,1,128,1,float16,fp8,0,0.02737066646416982
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,8,128,1,float16,float16,0,0.013045333325862885
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,8,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,1,128,1,float16,float16,0,0.011061333119869232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,4,128,1,float16,float16,0,0.030917334059874218
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,16,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,1,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,2,128,1,float16,float16,0,0.0107893335322539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,16,128,1,float16,float16,0,0.010661333799362183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,2,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,4,128,1,float16,float16,0,0.010656000425418219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,4,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,8,128,1,float16,float16,0,0.010863999525705973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,8,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,1,128,1,float16,float16,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,1,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,16,128,1,float16,float16,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,2,128,1,float16,fp8,0,0.008650666723648706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,2,128,1,float16,float16,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,2,128,1,float16,float16,0,0.020106667031844456
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,4,128,1,float16,float16,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,4,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,8,128,1,float16,float16,0,0.008767999708652496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,8,128,1,float16,fp8,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,1,128,1,float16,float16,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,1,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,16,128,1,float16,float16,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,2,128,1,float16,float16,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,2,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,4,128,1,float16,float16,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,8,128,1,float16,fp8,0,0.009061333412925402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,8,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,16,128,1,float16,float16,0,0.008746666833758354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,1,128,1,float16,float16,0,0.008752000207702318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,16,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,2,128,1,float16,float16,0,0.00871999996403853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,2,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,4,128,1,float16,float16,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,4,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,8,128,1,float16,fp8,0,0.009279999881982803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,8,128,1,float16,float16,0,0.008752000207702318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,2,128,1,float16,float16,0,0.03126933425664902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,1,128,1,float16,float16,0,0.0953386624654134
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,2,128,1,float16,float16,0,0.09802666306495667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,1,128,1,float16,fp8,0,0.08850666880607605
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,16,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,16,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,4,128,1,float16,float16,0,0.09603200356165568
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,4,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,8,128,1,float16,fp8,0,0.08654933174451192
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,8,128,1,float16,float16,0,0.09713600079218547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,1,128,1,float16,fp8,0,0.008725333337982496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,1,128,1,float16,float16,0,0.05398400127887726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,1,128,1,float16,fp8,0,0.04771199822425842
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,16,128,1,float16,fp8,0,0.04786666731039683
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,2,128,1,float16,float16,0,0.05356800059477488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,2,128,1,float16,fp8,0,0.0476800004641215
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,4,128,1,float16,float16,0,0.05393599967161814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,4,128,1,float16,fp8,0,0.04761599997679392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,8,128,1,float16,float16,0,0.052613332867622375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,8,128,1,float16,fp8,0,0.04781333108743032
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,16,128,1,float16,float16,0,0.029504001140594482
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,1,128,1,float16,float16,0,0.03013866643110911
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,16,128,1,float16,fp8,0,0.027317332724730175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,1,128,1,float16,fp8,0,0.027402666707833607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,2,128,1,float16,fp8,0,0.08842133482297261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,2,128,1,float16,fp8,0,0.027119999130566914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,4,128,1,float16,float16,0,0.029253333806991577
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,4,128,1,float16,fp8,0,0.02769600103298823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,8,128,1,float16,float16,0,0.029616000751654308
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,16,128,1,float16,float16,0,0.01922133316596349
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,8,128,1,float16,fp8,0,0.027269333600997925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,1,128,1,float16,float16,0,0.019306667149066925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,1,128,1,float16,fp8,0,0.019173332800467808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,2,128,1,float16,float16,0,0.019226666539907455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,2,128,1,float16,fp8,0,0.018965333700180054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,4,128,1,float16,float16,0,0.019146667172511418
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,4,128,1,float16,fp8,0,0.019146667172511418
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,8,128,1,float16,float16,0,0.019189332922299702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,8,128,1,float16,fp8,0,0.017152000218629837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,16,128,1,float16,float16,0,0.0554720014333725
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,16,128,1,float16,float16,0,0.01301866645614306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,1,128,1,float16,float16,0,0.012975999464591345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,1,128,1,float16,fp8,0,0.012741333494583765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,16,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,2,128,1,float16,float16,0,0.013066666821638743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,4,128,1,float16,float16,0,0.012869333227475485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,2,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,4,128,1,float16,fp8,0,0.012831999609867731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,8,128,1,float16,float16,0,0.013050666699806849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,8,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,1,128,1,float16,float16,0,0.010954666882753372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,16,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,2,128,1,float16,float16,0,0.010944000134865442
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,2,128,1,float16,fp8,0,0.009141333401203156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,4,128,1,float16,float16,0,0.010853332777818045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,4,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,2,128,1,float16,float16,0,0.031541332602500916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,8,128,1,float16,float16,0,0.010938666760921478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,8,128,1,float16,fp8,0,0.009103999783595404
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,1,128,1,float16,float16,0,0.008736000085870424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,16,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,16,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,1,128,1,float16,fp8,0,0.00877333308259646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,2,128,1,float16,float16,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,2,128,1,float16,fp8,0,0.008672000219424566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,4,128,1,float16,float16,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,4,128,1,float16,fp8,0,0.08779199918111165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,4,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,8,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,8,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,1,128,1,float16,float16,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,1,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,16,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,16,128,1,float16,float16,0,0.008778666456540426
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,2,128,1,float16,float16,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,2,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,4,128,1,float16,float16,0,0.008842666943868002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,8,128,1,float16,float16,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,8,128,1,float16,fp8,0,0.008687999720374743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,16,128,1,float16,fp8,0,0.00874133345981439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,1,128,1,float16,float16,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,1,128,1,float16,fp8,0,0.008858666444818178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,2,128,1,float16,float16,0,0.00860799973209699
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,1,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,2,128,1,float16,fp8,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,4,128,1,float16,float16,0,0.008576000109314919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,4,128,1,float16,fp8,0,0.008714666590094566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,8,128,1,float16,fp8,0,0.008687999720374743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,8,128,1,float16,float16,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,0,0.010666667173306147
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,16,128,1,float16,float16,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,0,0.011407999942700068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,0,0.011055999745925268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,0,0.01674666628241539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,0,0.014752000570297241
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,0,0.027066667874654133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,0,0.023071999351183575
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,4,128,1,float16,fp8,0,0.008570666735370954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,16,128,1,float16,float16,0,0.008623999853928884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,16,128,1,float16,float16,0,0.02734400083621343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,0,0.008746666833758354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,16,128,1,float16,float16,0,0.010992000500361124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,0,0.01192533348997434
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,0,0.01711999997496605
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,0,0.014709333578745524
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,16,128,1,float16,float16,0,0.017221332838137943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,0,0.009882666791478792
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,16,128,1,float16,fp8,0,0.014965333044528961
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,0,0.00842666688064734
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,0,0.0069759997228781385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,0,0.011349332829316458
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,16,128,1,float16,float16,0,0.012586666891972223
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,0,0.007018666714429855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,16,128,1,float16,fp8,0,0.01101333275437355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,0,0.006874666859706243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,0,0.006895999734600385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,0,0.006655999769767125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,0,0.00877333308259646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,0,0.0069759997228781385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,0,0.008650666723648706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,0,0.00701333334048589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,16,128,1,float16,float16,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,16,128,1,float16,fp8,0,0.008650666723648706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,16,128,1,float16,fp8,0,0.023029332359631855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,0,0.0069866664707660675
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,0,0.007045333584149678
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,0,0.008538666491707167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,0,0.006837333242098491
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,0,0.006895999734600385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,0,0.006682666639486949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,0,0.006858666737874349
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,0,0.007141333073377609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,0,0.006927999978264173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,16,128,1,float16,fp8,0,0.006927999978264173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,16,128,1,float16,float16,0,0.008458666503429413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,0,0.006874666859706243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,0,0.006784000123540561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,0,0.00684799998998642
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,0,0.0068853336075941724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,0,0.006805333619316419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,0,0.006853333363930385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,0,0.006864000111818314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,16,128,1,float16,fp8,0,0.006688000013430913
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,0,0.006826666494210561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,0,0.006591999903321266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,0,0.006784000123540561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,0,0.006842666616042455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,0,0.00679466687142849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,0,0.006853333363930385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,16,128,1,float16,float16,0,0.0068853336075941724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,0,0.00679466687142849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,16,128,1,float16,fp8,0,0.0069226666043202085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,0,0.00666133314371109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,0,0.007029333462317784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,0,0.006789333497484525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,0,0.006645333642760913
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,0,0.006725333631038666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,0,0.008298666526873907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,0,0.006693333387374878
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,16,128,1,float16,float16,0,0.006800000245372455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,16,128,1,float16,fp8,0,0.006906666482488315
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,0,0.006981333096822103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,0,0.006789333497484525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,0,0.0068693334857622785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,0,0.00679466687142849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,16,128,1,float16,float16,0,0.006927999978264173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,0,0.00690133310854435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,0,0.00749333327015241
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,0,0.006864000111818314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,1,128,1,float16,fp8,0,3.8775307337443032
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,1,128,1,float16,float16,0,4.198330561319987
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,2,128,1,float16,fp8,0,3.87334410349528
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,2,128,1,float16,float16,0,4.198896090189616
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,4,128,1,float16,float16,0,4.20303471883138
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,0,0.006895999734600385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,0,0.006837333242098491
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,12,128,1,float16,fp8,0,2.0662399927775064
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,12,128,1,float16,float16,0,2.169312000274658
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,1,128,1,float16,fp8,0,2.1244160334269204
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,1,128,1,float16,float16,0,2.677861213684082
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,2,128,1,float16,float16,0,2.2863146464029946
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,2,128,1,float16,fp8,0,1.995349407196045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,4,128,1,float16,fp8,0,3.891119956970215
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,4,128,1,float16,float16,0,2.1652746200561523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,1,128,1,float16,fp8,0,1.122501293818156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,12,128,1,float16,float16,0,1.2205813725789387
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,1,128,1,float16,float16,0,1.5358880360921223
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,2,128,1,float16,float16,0,1.1481173038482666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,4,128,1,float16,fp8,0,1.9962506294250488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,2,128,1,float16,fp8,0,1.1235840320587158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,4,128,1,float16,float16,0,1.3680853843688965
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,1,128,1,float16,float16,0,0.7367786566416422
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,1,128,1,float16,fp8,0,0.6224213441212972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,4,128,1,float16,fp8,0,1.1205066839853923
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,12,128,1,float16,float16,0,0.6812373002370199
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,2,128,1,float16,float16,0,0.6446880102157593
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,2,128,1,float16,fp8,0,0.6234133243560791
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,4,128,1,float16,float16,0,0.6409866809844971
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,4,128,1,float16,fp8,0,0.6229333480199178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,1,128,1,float16,float16,0,2.4424427350362143
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,1,128,1,float16,fp8,0,2.3518932660420737
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,2,128,1,float16,float16,0,2.815583864847819
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,2,128,1,float16,fp8,0,2.253546714782715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,4,128,1,float16,float16,0,2.441626707712809
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,12,128,1,float16,fp8,0,1.2475732962290447
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,1,128,1,float16,fp8,0,1.2487680117289226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,1,128,1,float16,float16,0,1.6032907168070476
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,2,128,1,float16,float16,0,1.2718026638031006
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,12,128,1,float16,fp8,0,1.0591039657592773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,2,128,1,float16,fp8,0,1.2239306767781575
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,4,128,1,float16,float16,0,1.2768853505452473
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,4,128,1,float16,fp8,0,1.342565377553304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,12,128,1,float16,float16,0,0.6880266666412354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,12,128,1,float16,fp8,0,0.6757706801096598
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,12,128,1,float16,fp8,0,0.7216160297393799
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,1,128,1,float16,float16,0,0.7308693726857504
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,1,128,1,float16,fp8,0,0.6731626987457275
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,2,128,1,float16,float16,0,0.8308373292287191
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,4,128,1,float16,float16,0,0.6887573401133219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,4,128,1,float16,fp8,0,0.6736799875895182
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,12,128,1,float16,float16,0,0.39687466621398926
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,12,128,1,float16,fp8,0,0.3626240094502767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,1,128,1,float16,float16,0,0.45073068141937256
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,1,128,1,float16,fp8,0,0.3863573471705119
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,2,128,1,float16,float16,0,0.44476266702016193
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,2,128,1,float16,fp8,0,0.38579734166463214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,4,128,1,float16,fp8,0,0.3914666573206584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,4,128,1,float16,float16,0,0.4026399850845337
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,4,128,1,float16,fp8,0,2.4047199885050454
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,1,128,1,float16,float16,0,1.7387199401855469
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,1,128,1,float16,fp8,0,1.718938668568929
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,2,128,1,float16,float16,0,1.8126452763875325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,12,128,1,float16,float16,0,1.2737013498942058
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,4,128,1,float16,float16,0,1.8865706125895183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,12,128,1,float16,float16,0,0.916266679763794
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,4,128,1,float16,fp8,0,1.7162933349609375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,1,128,1,float16,float16,0,1.0045973459879558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,1,128,1,float16,fp8,0,0.8991413116455078
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,2,128,1,float16,float16,0,1.0923733711242676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,2,128,1,float16,fp8,0,0.8991040388743082
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,4,128,1,float16,float16,0,0.9927840232849121
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,12,128,1,float16,float16,0,0.5499573151270548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,2,128,1,float16,fp8,0,0.6739520231882731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,1,128,1,float16,float16,0,0.5764106512069702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,12,128,1,float16,fp8,0,0.4920106728871663
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,1,128,1,float16,fp8,0,0.5102453231811523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,2,128,1,float16,float16,0,0.5364213387171427
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,2,128,1,float16,fp8,0,0.4898560047149658
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,4,128,1,float16,float16,0,0.5430239836374918
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,4,128,1,float16,fp8,0,0.5073493321736654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,12,128,1,float16,float16,0,0.32018133004506427
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,1,128,1,float16,float16,0,0.3164586623509725
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,12,128,1,float16,fp8,0,0.26898133754730225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,1,128,1,float16,fp8,0,0.2930346727371216
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,2,128,1,float16,float16,0,0.3280319968859355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,2,128,1,float16,fp8,0,0.2871946692466736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,4,128,1,float16,float16,0,0.2925493319829305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,4,128,1,float16,fp8,0,0.2914399902025859
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,2,128,1,float16,fp8,0,1.74236265818278
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,12,128,1,float16,fp8,0,0.8997440338134766
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,1,128,1,float16,float16,0,2.3843626976013184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,1,128,1,float16,fp8,0,2.075119972229004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,2,128,1,float16,float16,0,2.2389440536499023
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,2,128,1,float16,fp8,0,2.2314987182617188
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,4,128,1,float16,fp8,0,2.0796586672465005
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,4,128,1,float16,float16,0,2.370570659637451
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,12,128,1,float16,float16,0,1.159823973973592
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,1,128,1,float16,float16,0,1.1537013053894043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,12,128,1,float16,fp8,0,1.097055991490682
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,2,128,1,float16,float16,0,1.1596852938334148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,2,128,1,float16,fp8,0,1.1457013289133708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,4,128,1,float16,float16,0,1.4316320419311523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,4,128,1,float16,fp8,0,1.070357322692871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,12,128,1,float16,float16,0,0.6158933242162069
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,12,128,1,float16,fp8,0,0.5903893311818441
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,1,128,1,float16,fp8,0,0.6059893369674683
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,2,128,1,float16,float16,0,0.7396533489227295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,4,128,1,float16,float16,0,0.6133333444595337
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,4,128,1,float16,fp8,0,0.5802719990412394
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,12,128,1,float16,float16,0,0.34574933846791583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,1,128,1,float16,float16,0,0.3410400152206421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,1,128,1,float16,fp8,0,0.3397866487503052
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,2,128,1,float16,float16,0,0.38948265711466473
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,2,128,1,float16,fp8,0,0.3141973416010539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,4,128,1,float16,float16,0,0.34306665261586505
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,4,128,1,float16,fp8,0,0.33847467104593915
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,12,128,1,float16,float16,0,0.20830933252970377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,1,128,1,float16,float16,0,0.22605333725611368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,12,128,1,float16,fp8,0,0.1888586680094401
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,1,128,1,float16,fp8,0,0.20226667324701944
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,2,128,1,float16,float16,0,0.22500266631444296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,2,128,1,float16,fp8,0,0.2025760014851888
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,4,128,1,float16,fp8,0,0.8995680014292399
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,4,128,1,float16,float16,0,0.21859200795491537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,4,128,1,float16,fp8,0,0.20213866233825684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,1,128,1,float16,fp8,0,1.0696533521016438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,1,128,1,float16,fp8,0,1.315930684407552
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,2,128,1,float16,float16,0,1.3257333437601726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,1,128,1,float16,float16,0,0.7211946646372477
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,2,128,1,float16,fp8,0,1.3191306591033936
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,4,128,1,float16,float16,0,1.327397346496582
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,2,128,1,float16,fp8,0,0.6050719817479452
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,1,128,1,float16,float16,0,0.6906987031300863
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,12,128,1,float16,float16,0,0.6951200167338053
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,12,128,1,float16,fp8,0,0.643946647644043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,4,128,1,float16,fp8,0,1.3181920051574707
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,1,128,1,float16,fp8,0,0.68559463818868
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,12,128,1,float16,fp8,0,0.3470453421274821
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,2,128,1,float16,float16,0,0.7237493197123209
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,2,128,1,float16,fp8,0,0.6415253480275472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,4,128,1,float16,float16,0,0.6913546721140543
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,4,128,1,float16,fp8,0,0.6425919930140177
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,12,128,1,float16,float16,0,0.37726398309071857
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,1,128,1,float16,float16,0,0.3800746599833171
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,1,128,1,float16,fp8,0,0.37107733885447186
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,2,128,1,float16,float16,0,0.3754133383433024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,12,128,1,float16,fp8,0,0.34889598687489826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,2,128,1,float16,fp8,0,0.3708053429921468
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,4,128,1,float16,float16,0,0.4007413387298584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,4,128,1,float16,fp8,0,0.3492213487625122
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,12,128,1,float16,float16,0,0.22350400686264038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,1,128,1,float16,fp8,0,0.21420800685882568
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,2,128,1,float16,float16,0,0.21782932678858438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,2,128,1,float16,fp8,0,0.21358933051427206
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,4,128,1,float16,float16,0,0.2181546688079834
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,4,128,1,float16,fp8,0,0.21030400196711221
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,12,128,1,float16,float16,0,0.13893333077430725
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,12,128,1,float16,fp8,0,0.12661866346995035
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,1,128,1,float16,fp8,0,0.13741333285967508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,2,128,1,float16,float16,0,0.151146670182546
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,2,128,1,float16,fp8,0,0.12761599818865457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,4,128,1,float16,fp8,0,0.12776000301043192
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,1,128,1,float16,float16,0,1.324730634689331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,1,128,1,float16,float16,0,1.5334240595499675
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,1,128,1,float16,fp8,0,1.2530240217844646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,2,128,1,float16,float16,0,1.3709012667338054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,2,128,1,float16,fp8,0,1.1749760309855144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,4,128,1,float16,fp8,0,1.2540266513824463
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,4,128,1,float16,float16,0,1.4793334007263184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,12,128,1,float16,float16,0,0.6573973496754965
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,1,128,1,float16,float16,0,0.23147199551264444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,12,128,1,float16,fp8,0,0.21477866172790527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,1,128,1,float16,float16,0,0.7472373644510905
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,12,128,1,float16,fp8,0,0.6096106767654419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,1,128,1,float16,fp8,0,0.6473866701126099
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,1,128,1,float16,float16,0,0.15085867047309875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,2,128,1,float16,float16,0,0.6865333716074625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,2,128,1,float16,fp8,0,0.6326560179392496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,4,128,1,float16,float16,0,0.13833600282669067
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,4,128,1,float16,fp8,0,0.6074719826380411
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,4,128,1,float16,float16,0,0.6552426815032959
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,1,128,1,float16,float16,0,0.394538680712382
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,12,128,1,float16,fp8,0,0.34432001908620197
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,1,128,1,float16,fp8,0,0.3436959981918335
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,12,128,1,float16,float16,0,0.34994665781656903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,2,128,1,float16,float16,0,0.402672012646993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,2,128,1,float16,fp8,0,0.343450665473938
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,4,128,1,float16,float16,0,0.38333332538604736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,4,128,1,float16,fp8,0,0.344810684521993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,1,128,1,float16,fp8,0,0.1935946742693583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,1,128,1,float16,float16,0,0.2193173368771871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,12,128,1,float16,float16,0,0.19793599843978882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,2,128,1,float16,float16,0,0.20882666110992432
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,12,128,1,float16,fp8,0,0.18225600322087607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,2,128,1,float16,fp8,0,0.19183466831843057
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,4,128,1,float16,float16,0,0.2106026609738668
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,4,128,1,float16,fp8,0,0.19405333201090494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,1,128,1,float16,float16,0,0.1258080005645752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,1,128,1,float16,fp8,0,0.10789333780606587
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,12,128,1,float16,fp8,0,0.11489066481590271
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,12,128,1,float16,float16,0,0.11913599570592244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,2,128,1,float16,fp8,0,0.11568533380826314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,4,128,1,float16,float16,0,0.11788266897201538
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,1,128,1,float16,float16,0,0.08449600140253703
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,2,128,1,float16,float16,0,0.07902400195598602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,12,128,1,float16,float16,0,0.07791466514269511
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,2,128,1,float16,fp8,0,0.07654400169849396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,4,128,1,float16,float16,0,0.07828266421953838
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,4,128,1,float16,fp8,0,0.07734400033950806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,1,128,1,float16,float16,0,0.7665013472239176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,2,128,1,float16,float16,0,0.7673973242441813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,2,128,1,float16,fp8,0,0.7655946413675944
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,4,128,1,float16,float16,0,0.8171947002410889
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,4,128,1,float16,fp8,0,0.7213013172149658
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,12,128,1,float16,float16,0,0.406826655069987
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,12,128,1,float16,fp8,0,0.377461314201355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,2,128,1,float16,float16,0,0.12665067116419473
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,4,128,1,float16,fp8,0,0.11540800333023071
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,1,128,1,float16,fp8,0,0.3991306622823079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,2,128,1,float16,float16,0,0.4010293483734131
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,1,128,1,float16,fp8,0,0.07682133217652638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,2,128,1,float16,fp8,0,0.39416531721750897
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,4,128,1,float16,float16,0,0.4249546527862549
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,1,128,1,float16,float16,0,0.22616000970204672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,12,128,1,float16,fp8,0,0.2188160022099813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,12,128,1,float16,float16,0,0.22215465704600015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,1,128,1,float16,fp8,0,0.21719467639923096
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,2,128,1,float16,float16,0,0.21912533044815063
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,2,128,1,float16,fp8,0,0.2173653244972229
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,4,128,1,float16,float16,0,0.2186560034751892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,1,128,1,float16,fp8,0,0.7585226694742838
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,12,128,1,float16,float16,0,0.13117866714795431
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,12,128,1,float16,fp8,0,0.12680533528327942
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,1,128,1,float16,fp8,0,0.12567999958992004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,2,128,1,float16,float16,0,0.13583999872207642
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,2,128,1,float16,fp8,0,0.12494933605194092
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,4,128,1,float16,float16,0,0.1379680037498474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,4,128,1,float16,fp8,0,0.1234879990418752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,12,128,1,float16,float16,0,0.08204799890518188
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,12,128,1,float16,fp8,0,0.07955199976762135
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,1,128,1,float16,float16,0,0.08682133754094441
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,1,128,1,float16,fp8,0,0.08066666622956593
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,2,128,1,float16,float16,0,0.08643200000127156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,4,128,1,float16,float16,0,0.08679466446240743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,4,128,1,float16,fp8,0,0.07856533428033192
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,12,128,1,float16,fp8,0,0.04775999983151754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,12,128,1,float16,float16,0,0.04810666541258494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,1,128,1,float16,float16,0,0.4631839990615845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,1,128,1,float16,float16,0,0.05339199801286062
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,12,128,1,float16,fp8,0,0.07677866518497467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,1,128,1,float16,fp8,0,0.04776533444722494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,2,128,1,float16,float16,0,0.04864533245563507
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,2,128,1,float16,fp8,0,0.04763199885686239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,4,128,1,float16,float16,0,0.051957334081331887
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,4,128,1,float16,fp8,0,0.0479360024134318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,4,128,1,float16,fp8,0,0.4012480179468791
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,1,128,1,float16,fp8,0,0.7521546681722006
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,4,128,1,float16,fp8,0,0.20410666863123575
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,4,128,1,float16,float16,0,0.768501361211141
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,2,128,1,float16,float16,0,0.8040640354156494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,2,128,1,float16,fp8,0,0.7591093381245931
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,12,128,1,float16,float16,0,0.40296534697214764
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,4,128,1,float16,fp8,0,0.7630506356557211
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,12,128,1,float16,fp8,0,0.37860266367594403
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,1,128,1,float16,float16,0,0.4479200045267741
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,1,128,1,float16,fp8,0,0.37355732917785645
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,2,128,1,float16,fp8,0,0.07880533238252004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,2,128,1,float16,float16,0,0.44647467136383057
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,2,128,1,float16,fp8,0,0.3940426508585612
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,4,128,1,float16,float16,0,0.3980799913406372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,12,128,1,float16,float16,0,0.21576533714930216
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,1,128,1,float16,float16,0,0.2126986583073934
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,1,128,1,float16,fp8,0,0.21169066429138184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,12,128,1,float16,fp8,0,0.2137440045674642
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,2,128,1,float16,float16,0,0.23619733254114786
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,2,128,1,float16,fp8,0,0.19892799854278564
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,4,128,1,float16,float16,0,0.21383466323216757
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,1,128,1,float16,float16,0,0.1304800013701121
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,12,128,1,float16,fp8,0,0.11426666378974915
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,12,128,1,float16,float16,0,0.13057066996892294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,1,128,1,float16,fp8,0,0.11998933553695679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,2,128,1,float16,float16,0,0.1283466617266337
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,2,128,1,float16,fp8,0,0.11274666587511699
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,1,128,1,float16,float16,0,0.1258026659488678
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,4,128,1,float16,float16,0,0.11998933553695679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,4,128,1,float16,fp8,0,0.11565333604812622
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,1,128,1,float16,float16,0,0.07698133091131847
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,12,128,1,float16,fp8,0,0.06674666702747345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,1,128,1,float16,fp8,0,0.0713973343372345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,2,128,1,float16,float16,0,0.07715733349323273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,2,128,1,float16,fp8,0,0.07110933462778728
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,4,128,1,float16,float16,0,0.07891733447710673
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,4,128,1,float16,fp8,0,0.06620266536871593
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,1,128,1,float16,float16,0,0.052671998739242554
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,12,128,1,float16,fp8,0,0.047557334105173744
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,12,128,1,float16,float16,0,0.048138668139775596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,2,128,1,float16,float16,0,0.051781331499417625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,2,128,1,float16,fp8,0,0.04757333298524221
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,4,128,1,float16,float16,0,0.04783466458320618
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,4,128,1,float16,fp8,0,0.047824000318845115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,1,128,1,float16,float16,0,0.03760000069936117
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,12,128,1,float16,fp8,0,0.033514666060606636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,12,128,1,float16,float16,0,0.039264000952243805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,1,128,1,float16,fp8,0,0.0351946676770846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,2,128,1,float16,float16,0,0.035487999518712364
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,2,128,1,float16,fp8,0,0.03530666728814443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,4,128,1,float16,float16,0,0.03750933210055033
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,4,128,1,float16,fp8,0,0.03329066683848699
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,4,128,1,float16,fp8,0,0.2102773388226827
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,1,128,1,float16,float16,0,0.4859466552734375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,1,128,1,float16,fp8,0,0.4846133391062419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,2,128,1,float16,float16,0,0.4867946704228719
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,1,128,1,float16,float16,0,0.7653066317240397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,2,128,1,float16,fp8,0,0.4856906731923421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,4,128,1,float16,float16,0,0.505898674329122
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,12,128,1,float16,float16,0,0.07434133191903432
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,4,128,1,float16,fp8,0,0.4633920192718506
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,1,128,1,float16,fp8,0,0.2547893325487773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,12,128,1,float16,fp8,0,0.24486400683720908
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,1,128,1,float16,float16,0,0.2555999954541524
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,12,128,1,float16,float16,0,0.26044267416000366
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,1,128,1,float16,fp8,0,0.04807466765244802
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,2,128,1,float16,fp8,0,0.2412160038948059
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,2,128,1,float16,float16,0,0.2549333373705546
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,4,128,1,float16,float16,0,0.26902933915456134
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,1,128,1,float16,float16,0,0.14677866299947104
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,4,128,1,float16,fp8,0,0.2561226685841878
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,12,128,1,float16,fp8,0,0.1339413324991862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,4,128,1,float16,fp8,0,0.37589867909749347
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,2,128,1,float16,float16,0,0.15053332845369974
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,1,128,1,float16,fp8,0,0.13150933384895325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,4,128,1,float16,float16,0,0.14848533272743225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,4,128,1,float16,fp8,0,0.1395840048789978
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,1,128,1,float16,fp8,0,0.07427200178305308
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,12,128,1,float16,float16,0,0.08990933497746785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,12,128,1,float16,fp8,0,0.08026133477687836
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,2,128,1,float16,fp8,0,0.0790773332118988
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,4,128,1,float16,float16,0,0.0807360013326009
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,12,128,1,float16,float16,0,0.051856001218159996
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,12,128,1,float16,fp8,0,0.04761599997679392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,1,128,1,float16,fp8,0,0.04994666576385498
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,2,128,1,float16,float16,0,0.05444799860318502
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,2,128,1,float16,fp8,0,0.04984533290068308
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,4,128,1,float16,float16,0,0.054144000013669334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,4,128,1,float16,fp8,0,0.04964800179004669
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,12,128,1,float16,fp8,0,0.029450667401154835
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,12,128,1,float16,float16,0,0.031141333281993866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,1,128,1,float16,float16,0,0.033520000676314034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,1,128,1,float16,fp8,0,0.031221332649389904
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,2,128,1,float16,float16,0,0.03350933392842611
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,2,128,1,float16,fp8,0,0.029893333713213604
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,4,128,1,float16,float16,0,0.03359466542800268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,4,128,1,float16,fp8,0,0.030597334106763203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,12,128,1,float16,float16,0,0.029317334294319153
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,12,128,1,float16,float16,0,0.1425493359565735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,12,128,1,float16,fp8,0,0.027141332626342773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,1,128,1,float16,fp8,0,0.029418667157491047
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,2,128,1,float16,float16,0,0.03138133386770884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,2,128,1,float16,float16,0,0.08473599950472514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,2,128,1,float16,fp8,0,0.029114666084448498
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,4,128,1,float16,float16,0,0.031354665756225586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,4,128,1,float16,fp8,0,0.07823466757933299
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,1,128,1,float16,float16,0,0.05414933462937673
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,1,128,1,float16,float16,0,0.5590293407440186
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,1,128,1,float16,fp8,0,0.5108426809310913
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,2,128,1,float16,float16,0,0.5194133520126343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,2,128,1,float16,fp8,0,0.4978026549021403
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,4,128,1,float16,float16,0,0.5685226519902548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,4,128,1,float16,fp8,0,0.49905598163604736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,12,128,1,float16,float16,0,0.275434672832489
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,2,128,1,float16,fp8,0,0.1407786707083384
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,1,128,1,float16,float16,0,0.031514666974544525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,1,128,1,float16,float16,0,0.08816533287366231
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,1,128,1,float16,fp8,0,0.26849599679311115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,2,128,1,float16,float16,0,0.2871840000152588
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,12,128,1,float16,fp8,0,0.27060800790786743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,1,128,1,float16,float16,0,0.26872533559799194
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,2,128,1,float16,fp8,0,0.2665333350499471
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,12,128,1,float16,fp8,0,0.14473066727320352
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,4,128,1,float16,fp8,0,0.25806933641433716
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,1,128,1,float16,fp8,0,0.13784000277519226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,2,128,1,float16,float16,0,0.15267200271288553
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,2,128,1,float16,fp8,0,0.1455466647942861
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,4,128,1,float16,float16,0,0.1541813313961029
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,4,128,1,float16,fp8,0,0.1434453328450521
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,12,128,1,float16,float16,0,0.0839359958966573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,12,128,1,float16,fp8,0,0.08330133557319641
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,1,128,1,float16,fp8,0,0.08104533453782399
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,2,128,1,float16,float16,0,0.08803199728329976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,2,128,1,float16,fp8,0,0.08049599826335907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,4,128,1,float16,float16,0,0.08886399865150452
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,4,128,1,float16,fp8,0,0.07627733548482259
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,12,128,1,float16,float16,0,0.051141331593195595
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,4,128,1,float16,fp8,0,0.029391999046007793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,1,128,1,float16,float16,0,0.04925866425037384
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,1,128,1,float16,fp8,0,0.04914666712284088
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,2,128,1,float16,fp8,0,0.04655999938646952
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,4,128,1,float16,float16,0,0.049498667319615684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,4,128,1,float16,fp8,0,0.04644800225893656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,12,128,1,float16,float16,0,0.14813333749771118
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,4,128,1,float16,float16,0,0.27161065737406415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,1,128,1,float16,float16,0,0.03401600072781245
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,12,128,1,float16,float16,0,0.033813332517941795
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,12,128,1,float16,fp8,0,0.03178666780392329
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,2,128,1,float16,float16,0,0.033589333295822144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,1,128,1,float16,fp8,0,0.033610666791598
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,2,128,1,float16,fp8,0,0.03151999910672506
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,4,128,1,float16,float16,0,0.03389866650104523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,12,128,1,float16,fp8,0,0.022869333624839783
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,1,128,1,float16,float16,0,0.025253333151340485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,1,128,1,float16,fp8,0,0.0229066660006841
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,2,128,1,float16,float16,0,0.025045332809289295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,2,128,1,float16,fp8,0,0.022805333137512207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,4,128,1,float16,float16,0,0.02325333406527837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,4,128,1,float16,fp8,0,0.022970666488011677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,12,128,1,float16,fp8,0,0.022133332987626392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,12,128,1,float16,float16,0,0.02309333284695943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,1,128,1,float16,float16,0,0.023215999205907185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,1,128,1,float16,fp8,0,0.02330133318901062
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,2,128,1,float16,float16,0,0.02319466571013133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,2,128,1,float16,fp8,0,0.02295999974012375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,4,128,1,float16,float16,0,0.023210667073726654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,2,128,1,float16,float16,0,0.0518453319867452
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,4,128,1,float16,fp8,0,0.022986667851607006
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,1,128,1,float16,float16,0,0.1549493372440338
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,1,128,1,float16,float16,0,0.3938773473103841
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,4,128,1,float16,fp8,0,0.03166399896144867
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,1,128,1,float16,fp8,0,0.39156798521677655
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,2,128,1,float16,float16,0,0.4058239857355754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,12,128,1,float16,float16,0,0.02533866713444392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,1,128,1,float16,float16,0,0.08372799555460612
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,2,128,1,float16,fp8,0,0.3835893472035726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,1,128,1,float16,float16,0,0.21145067612330118
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,12,128,1,float16,float16,0,0.2099413275718689
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,1,128,1,float16,fp8,0,0.2031466762224833
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,12,128,1,float16,fp8,0,0.2004800041516622
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,4,128,1,float16,float16,0,0.3957546552022298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,2,128,1,float16,float16,0,0.20614399512608847
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,12,128,1,float16,fp8,0,0.049925332268079124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,4,128,1,float16,fp8,0,0.3843093315760295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,2,128,1,float16,fp8,0,0.2036906679471334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,12,128,1,float16,float16,0,0.11888532837231953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,1,128,1,float16,fp8,0,0.11086933811505635
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,4,128,1,float16,fp8,0,0.20244266589482626
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,1,128,1,float16,float16,0,0.11713600158691406
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,4,128,1,float16,float16,0,0.2115466594696045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,2,128,1,float16,fp8,0,0.10699199636777242
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,12,128,1,float16,fp8,0,0.06364266574382782
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,4,128,1,float16,float16,0,0.1113973359266917
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,1,128,1,float16,fp8,0,0.060378665725390114
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,2,128,1,float16,float16,0,0.06592533489068349
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,1,128,1,float16,float16,0,0.06628799935181935
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,4,128,1,float16,float16,0,0.0641546646753947
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,2,128,1,float16,fp8,0,0.06247466802597046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,2,128,1,float16,float16,0,0.11641599734624226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,12,128,1,float16,fp8,0,0.11172266801198323
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,1,128,1,float16,float16,0,0.038719999293486275
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,12,128,1,float16,float16,0,0.03967999915281931
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,1,128,1,float16,fp8,0,0.037578667203585304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,2,128,1,float16,float16,0,0.039306665460268654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,2,128,1,float16,fp8,0,0.0352960005402565
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,4,128,1,float16,float16,0,0.03932799895604452
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,4,128,1,float16,fp8,0,0.03700266778469086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,12,128,1,float16,float16,0,0.026848000784715016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,1,128,1,float16,fp8,0,0.02693866689999898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,1,128,1,float16,float16,0,0.027376001079877216
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,12,128,1,float16,fp8,0,0.025050667424996693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,2,128,1,float16,float16,0,0.026730666557947796
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,2,128,1,float16,fp8,0,0.025258667767047882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,4,128,1,float16,float16,0,0.026346666117509205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,4,128,1,float16,fp8,0,0.025301332275072735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,12,128,1,float16,float16,0,0.017909333109855652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,12,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,1,128,1,float16,fp8,0,0.01738133281469345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,2,128,1,float16,float16,0,0.019039999693632126
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,2,128,1,float16,fp8,0,0.017082666357358296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,4,128,1,float16,float16,0,0.01889066646496455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,4,128,1,float16,fp8,0,0.10977600018183391
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,4,128,1,float16,fp8,0,0.0170666662355264
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,1,128,1,float16,float16,0,0.01762666677435239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,12,128,1,float16,float16,0,0.016890666137139004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,12,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,1,128,1,float16,fp8,0,0.017082666357358296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,2,128,1,float16,float16,0,0.017050666113694508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,2,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,4,128,1,float16,float16,0,0.017370666066805523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,12,128,1,float16,float16,0,0.06457066535949707
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,4,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,1,128,1,float16,float16,0,0.01717866708834966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,12,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,1,128,1,float16,fp8,0,0.017029333859682083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,12,128,1,float16,fp8,0,0.0376800000667572
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,2,128,1,float16,float16,0,0.017136000096797943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,2,128,1,float16,fp8,0,0.017375999440749485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,4,128,1,float16,float16,0,0.017210666090250015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,4,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,1,128,1,float16,float16,0,0.01701333373785019
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,1,128,1,float16,fp8,0,0.16830933094024658
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,1,128,1,float16,float16,0,0.1721013387044271
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,2,128,1,float16,float16,0,0.17186667521794638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,4,128,1,float16,fp8,0,0.0620000014702479
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,2,128,1,float16,fp8,0,0.16709333658218384
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,4,128,1,float16,float16,0,0.16969066858291626
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,12,128,1,float16,float16,0,0.017157333592573803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,4,128,1,float16,fp8,0,0.16658133268356323
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,12,128,1,float16,float16,0,0.09735467036565144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,1,128,1,float16,fp8,0,0.09212799866994222
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,12,128,1,float16,fp8,0,0.09099200367927551
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,1,128,1,float16,float16,0,0.09634133179982503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,4,128,1,float16,float16,0,0.09667733311653137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,2,128,1,float16,float16,0,0.09633599718411763
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,12,128,1,float16,fp8,0,0.05269333223501841
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,1,128,1,float16,fp8,0,0.052799999713897705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,2,128,1,float16,fp8,0,0.09312533338864644
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,2,128,1,float16,fp8,0,0.05341866612434387
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,12,128,1,float16,float16,0,0.05585599939028422
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,4,128,1,float16,fp8,0,0.05259733398755392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,2,128,1,float16,float16,0,0.05715733269850413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,4,128,1,float16,float16,0,0.054154664278030396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,12,128,1,float16,float16,0,0.031471999982992806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,2,128,1,float16,float16,0,0.03306666761636734
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,1,128,1,float16,fp8,0,0.030154667794704437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,12,128,1,float16,fp8,0,0.02975466599067052
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,1,128,1,float16,float16,0,0.053946668903032936
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,2,128,1,float16,fp8,0,0.03146133323510488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,4,128,1,float16,fp8,0,0.03141866624355316
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,1,128,1,float16,float16,0,0.023205332458019257
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,1,128,1,float16,fp8,0,0.02402133246262868
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,12,128,1,float16,float16,0,0.02330133318901062
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,12,128,1,float16,fp8,0,0.02292799949645996
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,2,128,1,float16,float16,0,0.023402666052182514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,2,128,1,float16,fp8,0,0.02329600105683009
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,4,128,1,float16,float16,0,0.023232000569502514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,4,128,1,float16,fp8,0,0.02332266668478648
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,12,128,1,float16,fp8,0,0.014965333044528961
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,1,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,2,128,1,float16,float16,0,0.01509333277742068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,2,128,1,float16,fp8,0,0.015082667271296183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,4,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,12,128,1,float16,float16,0,0.015061333775520325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,12,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,1,128,1,float16,float16,0,0.015024000157912573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,1,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,4,128,1,float16,fp8,0,0.09079466263453166
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,2,128,1,float16,float16,0,0.015109332899252573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,2,128,1,float16,fp8,0,0.015173333386580149
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,4,128,1,float16,fp8,0,0.01516266663869222
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,4,128,1,float16,float16,0,0.014741333822409311
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,12,128,1,float16,float16,0,0.015306666493415833
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,1,128,1,float16,float16,0,0.014837333311637243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,12,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,1,128,1,float16,fp8,0,0.015013333410024643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,2,128,1,float16,float16,0,0.014741333822409311
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,2,128,1,float16,fp8,0,0.01516266663869222
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,4,128,1,float16,float16,0,0.01526933287580808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,4,128,1,float16,fp8,0,0.014794666320085526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,1,128,1,float16,float16,0,0.014725333700577417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,12,128,1,float16,float16,0,0.01505600040157636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,12,128,1,float16,fp8,0,0.014970666418472925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,1,128,1,float16,fp8,0,0.014671999961137772
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,2,128,1,float16,fp8,0,0.015008000036080679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,4,128,1,float16,float16,0,0.014117332796255747
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,2,128,1,float16,float16,0,0.014842666685581207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,4,128,1,float16,fp8,0,0.014975999792416891
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,12,128,1,float16,float16,0,0.015098666151364645
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,4,128,1,float16,float16,0,0.015184000134468079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,1,128,1,float16,float16,0,0.10857599973678589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,1,128,1,float16,fp8,0,0.10573333501815796
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,2,128,1,float16,float16,0,0.10814932982126872
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,2,128,1,float16,fp8,0,0.1053600013256073
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,4,128,1,float16,float16,0,0.1086133321126302
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,12,128,1,float16,float16,0,0.06233599781990051
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,4,128,1,float16,fp8,0,0.10586133599281311
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,1,128,1,float16,float16,0,0.03160000095764796
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,12,128,1,float16,fp8,0,0.05824000140031179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,1,128,1,float16,float16,0,0.061834668119748436
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,1,128,1,float16,fp8,0,0.0581279993057251
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,2,128,1,float16,float16,0,0.06182399888833364
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,2,128,1,float16,fp8,0,0.05775466561317444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,4,128,1,float16,float16,0,0.06006933252016703
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,4,128,1,float16,fp8,0,0.05792533357938131
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,12,128,1,float16,fp8,0,0.03363733241955439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,12,128,1,float16,float16,0,0.036271999279658
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,1,128,1,float16,float16,0,0.03530666728814443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,1,128,1,float16,fp8,0,0.03380800038576126
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,2,128,1,float16,float16,0,0.035530666510264076
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,2,128,1,float16,fp8,0,0.03446399917205175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,4,128,1,float16,float16,0,0.035445332527160645
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,12,128,1,float16,float16,0,0.021183999876181286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,1,128,1,float16,float16,0,0.02199999988079071
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,4,128,1,float16,fp8,0,0.035504000882307686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,12,128,1,float16,fp8,0,0.020975999534130096
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,2,128,1,float16,float16,0,0.02197866638501485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,4,128,1,float16,float16,0,0.021216000119845074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,4,128,1,float16,fp8,0,0.021205333371957142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,12,128,1,float16,fp8,0,0.01575999955336253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,12,128,1,float16,float16,0,0.01661866654952367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,1,128,1,float16,float16,0,0.01706133286158244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,1,128,1,float16,fp8,0,0.016890666137139004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,2,128,1,float16,float16,0,0.01703466723362605
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,4,128,1,float16,fp8,0,0.01617066686352094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,4,128,1,float16,float16,0,0.016832000265518825
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,4,128,1,float16,float16,0,0.03150933235883713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,1,128,1,float16,float16,0,0.012597333639860153
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,12,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,12,128,1,float16,float16,0,0.011098666737476984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,1,128,1,float16,fp8,0,0.010863999525705973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,2,128,1,float16,float16,0,0.012714666624863943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,2,128,1,float16,fp8,0,0.01101333275437355
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,4,128,1,float16,fp8,0,0.012186666329701742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,4,128,1,float16,float16,0,0.011445333560307821
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,1,128,1,float16,float16,0,0.01525866612792015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,12,128,1,float16,float16,0,0.01091733326514562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,1,128,1,float16,float16,0,0.011194666226704916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,1,128,1,float16,fp8,0,0.010869332899649939
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,12,128,1,float16,fp8,0,0.010933333386977514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,2,128,1,float16,float16,0,0.011087999989589056
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,2,128,1,float16,fp8,0,0.011071999867757162
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,4,128,1,float16,float16,0,0.011322667201360067
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,4,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,1,128,1,float16,float16,0,0.01081066702802976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,2,128,1,float16,float16,0,0.011007999380429586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,12,128,1,float16,float16,0,0.011055999745925268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,2,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,4,128,1,float16,float16,0,0.011066666493813196
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,4,128,1,float16,fp8,0,0.01089599976936976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,12,128,1,float16,fp8,0,0.010826667149861654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,12,128,1,float16,float16,0,0.011077333241701126
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,1,128,1,float16,float16,0,0.010949333508809408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,1,128,1,float16,fp8,0,0.021146667500336964
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,1,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,2,128,1,float16,float16,0,0.010938666760921478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,2,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,4,128,1,float16,float16,0,0.010645333677530289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,4,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,2,128,1,float16,fp8,0,0.021040000021457672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,2,128,1,float16,fp8,0,0.016965333372354507
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,1,128,1,float16,fp8,0,0.08329600095748901
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,2,128,1,float16,float16,0,0.08807466427485149
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,2,128,1,float16,fp8,0,0.08353066444396973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,4,128,1,float16,float16,0,0.08683199683825175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,12,128,1,float16,fp8,0,0.045791998505592346
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,12,128,1,float16,float16,0,0.049216002225875854
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,4,128,1,float16,fp8,0,0.08237866560618083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,1,128,1,float16,float16,0,0.047925333182017006
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,1,128,1,float16,fp8,0,0.04613866905371348
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,1,128,1,float16,fp8,0,0.010778666784365972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,12,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,2,128,1,float16,float16,0,0.049226666490236916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,12,128,1,float16,float16,0,0.028064000109831493
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,4,128,1,float16,fp8,0,0.04577066500981649
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,2,128,1,float16,fp8,0,0.046426668763160706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,4,128,1,float16,float16,0,0.04763199885686239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,1,128,1,float16,fp8,0,0.027248000105222065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,2,128,1,float16,fp8,0,0.027327999472618103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,2,128,1,float16,float16,0,0.029440000653266907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,4,128,1,float16,fp8,0,0.02743999908367793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,12,128,1,float16,float16,0,0.01883200059334437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,1,128,1,float16,float16,0,0.018735999862353008
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,1,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,12,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,2,128,1,float16,float16,0,0.018826667219400406
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,2,128,1,float16,fp8,0,0.017082666357358296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,4,128,1,float16,float16,0,0.017477333545684814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,4,128,1,float16,fp8,0,0.017338667064905167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,1,128,1,float16,float16,0,0.08477333188056946
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,12,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,12,128,1,float16,float16,0,0.01481066644191742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,1,128,1,float16,float16,0,0.014826666563749313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,2,128,1,float16,float16,0,0.014106666048367819
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,2,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,1,128,1,float16,fp8,0,0.01309866706530253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,4,128,1,float16,float16,0,0.013082666943470636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,4,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,1,128,1,float16,float16,0,0.010768000036478043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,12,128,1,float16,float16,0,0.010608000059922537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,1,128,1,float16,fp8,0,0.010693332801262537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,2,128,1,float16,float16,0,0.01081066702802976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,2,128,1,float16,fp8,0,0.011744000017642975
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,4,128,1,float16,float16,0,0.01055466632048289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,4,128,1,float16,fp8,0,0.01080000028014183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,12,128,1,float16,float16,0,0.009072000160813332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,1,128,1,float16,float16,0,0.009397333487868309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,12,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,1,128,1,float16,fp8,0,0.01232533281048139
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,2,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,12,128,1,float16,fp8,0,0.02703999976317088
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,2,128,1,float16,fp8,0,0.009359999870260557
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,4,128,1,float16,float16,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,4,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,1,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,1,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,12,128,1,float16,fp8,0,0.008623999853928884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,12,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,2,128,1,float16,float16,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,2,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,4,128,1,float16,float16,0,0.009098666409651438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,4,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,1,128,1,float16,float16,0,0.009989333028594652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,12,128,1,float16,float16,0,0.008816000074148178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,12,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,2,128,1,float16,float16,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,2,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,4,128,1,float16,float16,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,4,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,1,128,1,float16,float16,0,0.0772266685962677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,12,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,1,128,1,float16,fp8,0,0.07253866891066234
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,2,128,1,float16,float16,0,0.07649066547552745
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,1,128,1,float16,float16,0,0.028762665887673695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,4,128,1,float16,fp8,0,0.07243200143178304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,4,128,1,float16,float16,0,0.07618666688601176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,4,128,1,float16,float16,0,0.029365333418051403
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,1,128,1,float16,float16,0,0.042352000872294106
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,12,128,1,float16,fp8,0,0.03976533313592275
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,1,128,1,float16,fp8,0,0.04106133431196213
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,1,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,2,128,1,float16,float16,0,0.04366933306058248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,2,128,1,float16,fp8,0,0.04022933294375738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,4,128,1,float16,float16,0,0.04154666761557261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,4,128,1,float16,fp8,0,0.041306667029857635
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,1,128,1,float16,float16,0,0.025781333446502686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,12,128,1,float16,float16,0,0.025439999997615814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,12,128,1,float16,fp8,0,0.02498133232196172
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,1,128,1,float16,fp8,0,0.025199999411900837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,2,128,1,float16,float16,0,0.025216000775496166
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,2,128,1,float16,fp8,0,0.02532266577084859
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,4,128,1,float16,float16,0,0.02515733242034912
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,4,128,1,float16,fp8,0,0.025114665428797405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,12,128,1,float16,fp8,0,0.01605333387851715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,1,128,1,float16,float16,0,0.017077332983414333
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,1,128,1,float16,fp8,0,0.016842667013406754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,2,128,1,float16,float16,0,0.016842667013406754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,2,128,1,float16,fp8,0,0.016869333883126576
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,4,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,12,128,1,float16,float16,0,0.012981332838535309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,12,128,1,float16,fp8,0,0.012789333860079447
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,1,128,1,float16,float16,0,0.012837332983811697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,1,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,2,128,1,float16,float16,0,0.013002666334311167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,2,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,4,128,1,float16,float16,0,0.012736000120639801
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,12,128,1,float16,float16,0,0.04343999922275543
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,1,128,1,float16,float16,0,0.010319999729593595
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,12,128,1,float16,float16,0,0.01102399950226148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,12,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,2,128,1,float16,float16,0,0.010101333260536194
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,2,128,1,float16,fp8,0,0.01003200002014637
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,4,128,1,float16,fp8,0,0.009253333633144697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,12,128,1,float16,float16,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,12,128,1,float16,fp8,0,0.008623999853928884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,1,128,1,float16,float16,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,2,128,1,float16,float16,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,2,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,4,128,1,float16,float16,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,12,128,1,float16,float16,0,0.016906666258970898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,4,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,4,128,1,float16,float16,0,0.017029333859682083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,12,128,1,float16,float16,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,12,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,1,128,1,float16,float16,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,1,128,1,float16,fp8,0,0.00871999996403853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,2,128,1,float16,float16,0,0.010133333504199982
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,2,128,1,float16,fp8,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,4,128,1,float16,float16,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,4,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,4,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,2,128,1,float16,fp8,0,0.07211733361085255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,1,128,1,float16,float16,0,0.008805333326260248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,12,128,1,float16,float16,0,0.008821333448092142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,12,128,1,float16,fp8,0,0.008762666955590248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,1,128,1,float16,fp8,0,0.008613333106040955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,2,128,1,float16,float16,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,1,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,2,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,4,128,1,float16,float16,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,4,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,4,128,1,float16,float16,0,0.010170666500926018
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,1,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,1,128,1,float16,float16,0,0.07299200197060902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,1,128,1,float16,fp8,0,0.06842666864395142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,2,128,1,float16,float16,0,0.07506666580835979
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,2,128,1,float16,fp8,0,0.06858666737874348
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,4,128,1,float16,fp8,0,0.06631466746330261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,1,128,1,float16,float16,0,0.042447999119758606
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,12,128,1,float16,float16,0,0.041482667128245033
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,1,128,1,float16,fp8,0,0.0395413339138031
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,12,128,1,float16,fp8,0,0.03842666745185852
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,2,128,1,float16,fp8,0,0.03758399933576584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,4,128,1,float16,float16,0,0.0431573341290156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,12,128,1,float16,float16,0,0.02513599892457326
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,12,128,1,float16,fp8,0,0.023317334552605946
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,1,128,1,float16,float16,0,0.025263999899228413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,1,128,1,float16,fp8,0,0.02362666775782903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,2,128,1,float16,fp8,0,0.0232640008131663
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,4,128,1,float16,float16,0,0.025349333882331848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,4,128,1,float16,fp8,0,0.023242667317390442
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,1,128,1,float16,float16,0,0.01691199963291486
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,12,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,1,128,1,float16,fp8,0,0.014869333555301031
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,2,128,1,float16,float16,0,0.016837333639462788
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,2,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,4,128,1,float16,float16,0,0.017738666385412216
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,4,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,12,128,1,float16,float16,0,0.01312000056107839
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,12,128,1,float16,fp8,0,0.01267733300725619
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,1,128,1,float16,float16,0,0.01301866645614306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,4,128,1,float16,float16,0,0.0758186678091685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,1,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,2,128,1,float16,float16,0,0.012736000120639801
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,2,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,4,128,1,float16,float16,0,0.012885333349307379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,4,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,1,128,1,float16,float16,0,0.008853333070874214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,12,128,1,float16,float16,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,1,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,2,128,1,float16,float16,0,0.04155199974775314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,2,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,4,128,1,float16,float16,0,0.00978133330742518
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,2,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,4,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,12,128,1,float16,float16,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,1,128,1,float16,float16,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,1,128,1,float16,fp8,0,0.008837333569924036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,12,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,4,128,1,float16,fp8,0,0.03941333293914795
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,2,128,1,float16,float16,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,2,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,4,128,1,float16,float16,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,4,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,1,128,1,float16,float16,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,1,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,12,128,1,float16,fp8,0,0.008767999708652496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,12,128,1,float16,float16,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,2,128,1,float16,fp8,0,0.008645333349704742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,2,128,1,float16,float16,0,0.025311999022960663
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,4,128,1,float16,float16,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,4,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,1,128,1,float16,float16,0,0.008858666444818178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,1,128,1,float16,fp8,0,0.009088000282645226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,12,128,1,float16,float16,0,0.009162666896979014
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,2,128,1,float16,float16,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,2,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,12,128,1,float16,float16,0,0.01704000060757001
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,4,128,1,float16,float16,0,0.008799999952316284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,4,128,1,float16,fp8,0,0.00867733359336853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,0,0.010768000036478043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,0,0.009189333145817121
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,0,0.011429333438475927
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,0,0.016938666502634685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,0,0.011002667248249054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,12,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,0,0.008592000231146812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,0,0.007914666707317034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,12,128,1,float16,float16,0,0.021488000949223835
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,0,0.014842666685581207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,0,0.00847999999920527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,0,0.012682666381200155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,12,128,1,float16,float16,0,0.014842666685581207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,2,128,1,float16,float16,0,0.009125333279371262
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,12,128,1,float16,fp8,0,0.012645332763592402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,0,0.00871999996403853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,12,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,0,0.006773333375652631
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,12,128,1,float16,float16,0,0.010938666760921478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,0,0.006720000257094701
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,12,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,0,0.0068853336075941724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,0,0.006725333631038666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,0,0.006789333497484525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,0,0.008133333176374435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,0,0.006746666505932808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,0,0.0068853336075941724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,0,0.0069333333522081375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,12,128,1,float16,float16,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,0,0.006789333497484525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,0,0.006784000123540561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,0,0.0069973332186539965
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,0,0.006805333619316419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,12,128,1,float16,fp8,0,0.0069919998447100324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,12,128,1,float16,float16,0,0.007098666702707608
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,12,128,1,float16,fp8,0,0.019029332945744198
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,0,0.00697066696981589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,0,0.0068853336075941724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,0,0.0069333333522081375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,0,0.006890666360656421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,12,128,1,float16,float16,0,0.007087999954819679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,12,128,1,float16,fp8,0,0.006826666494210561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,0,0.0069440001000960665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,0,0.006858666737874349
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,0,0.006538666784763336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,0,0.006917333230376244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,0,0.006853333363930385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,12,128,1,float16,fp8,0,0.006773333375652631
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,12,128,1,float16,float16,0,0.006911999856432279
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,0,0.006837333242098491
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,0,0.006762666627764702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,0,0.006560000280539195
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,0,0.006698666761318843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,0,0.006784000123540561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,12,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,12,128,1,float16,float16,0,0.006650666395823161
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,12,128,1,float16,fp8,0,0.00684799998998642
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,0,0.006831999868154526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,0,0.006682666639486949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,0,0.007055999711155891
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,0,0.006640000268816948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,0,0.0069386667261521024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,0,0.006640000268816948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,1,128,1,float16,float16,0,2.9883734385172525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,1,128,1,float16,fp8,0,2.581669330596924
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,2,128,1,float16,float16,0,2.8138081232706704
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,2,128,1,float16,fp8,0,2.5822347005208335
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,0,0.007162666569153468
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,4,128,1,float16,float16,0,2.811408042907715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,4,128,1,float16,fp8,0,2.5828372637430825
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,1.3513013521830242
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,float16,0,1.6056747436523438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,1,128,1,float16,float16,0,1.8123733202616374
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,1,128,1,float16,fp8,0,1.4369759559631348
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,2,128,1,float16,fp8,0,1.3490613301595051
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,2,128,1,float16,float16,0,1.6226293245951335
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,4,128,1,float16,float16,0,1.5717760721842449
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,4,128,1,float16,fp8,0,1.3496586481730144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,float16,0,0.8172640005747477
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,float16,0,1.5086132685343425
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,0,0.0069333333522081375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,0.7800587018330892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,1.4589279492696126
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,1,128,1,float16,float16,0,0.9393333594004313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,1,128,1,float16,fp8,0,0.7784799734751383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,2,128,1,float16,float16,0,0.8008853594462076
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,2,128,1,float16,fp8,0,0.7445440292358398
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,4,128,1,float16,fp8,0,0.7783679962158203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,4,128,1,float16,float16,0,0.8558826446533203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,float16,0,0.47252798080444336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,0.7574453353881836
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,1,128,1,float16,float16,0,0.4984746774037679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,float16,0,0.8011786937713623
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,1,128,1,float16,fp8,0,0.42558932304382324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,2,128,1,float16,float16,0,0.5217599868774414
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,4,128,1,float16,fp8,0,0.46595199902852374
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,4,128,1,float16,float16,0,0.5121119817097982
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,0.45333866278330487
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,float16,0,0.4678560098012288
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,1,128,1,float16,float16,0,1.7067626317342122
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,1,128,1,float16,fp8,0,1.6003200213114421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,2,128,1,float16,fp8,0,1.604954719543457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,2,128,1,float16,float16,0,1.7926880518595378
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,4,128,1,float16,fp8,0,1.502885341644287
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,4,128,1,float16,float16,0,2.0017760594685874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,float16,0,0.8709706465403239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,0.8492960135142008
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,1,128,1,float16,float16,0,0.9250826835632324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,1,128,1,float16,fp8,0,0.8558026949564616
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,2,128,1,float16,float16,0,0.92630934715271
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,2,128,1,float16,fp8,0,0.848357359568278
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,4,128,1,float16,float16,0,0.8687679767608643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,4,128,1,float16,fp8,0,0.826304038365682
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,0.800048033396403
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,float16,0,0.8708586692810059
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,0.47305067380269367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,1,128,1,float16,float16,0,0.5154186487197876
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,1,128,1,float16,fp8,0,0.4723999897638957
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,2,128,1,float16,float16,0,0.5561920007069906
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,2,128,1,float16,fp8,0,0.4434933265050252
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,4,128,1,float16,float16,0,0.48957331975301105
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,4,128,1,float16,fp8,0,0.47257598241170246
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,2,128,1,float16,fp8,0,0.45371735095977783
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,float16,0,0.4862186511357625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,float16,0,0.3242986599604289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,0.4734453360239665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,1,128,1,float16,float16,0,0.2932906746864319
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,0.28193066517512005
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,1,128,1,float16,fp8,0,0.28224533796310425
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,2,128,1,float16,float16,0,0.3113279938697815
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,2,128,1,float16,fp8,0,0.2797386646270752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,4,128,1,float16,float16,0,0.317631999651591
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,4,128,1,float16,fp8,0,0.28196799755096436
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,float16,0,0.2926560044288635
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,0.2816693385442098
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,1,128,1,float16,float16,0,1.222101370493571
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,2,128,1,float16,float16,0,1.4540054003397624
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,2,128,1,float16,fp8,0,1.1455093224843342
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,4,128,1,float16,float16,0,1.362053394317627
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,0.6155893405278524
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,4,128,1,float16,fp8,0,1.0726346969604492
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,float16,0,0.6279893318812052
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,1,128,1,float16,float16,0,0.668224016825358
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,float16,0,0.5217493375142416
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,2,128,1,float16,fp8,0,0.5743680000305176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,2,128,1,float16,float16,0,0.7249120076497396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,4,128,1,float16,fp8,0,0.5755999883015951
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,4,128,1,float16,float16,0,0.7412373224894205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,float16,0,0.67303999265035
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,0.42497066656748456
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,float16,0,0.3883039951324463
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,0.34855465094248456
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,0.5781439940134684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,1,128,1,float16,float16,0,0.3999520142873128
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,1,128,1,float16,fp8,0,0.3520266612370809
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,2,128,1,float16,float16,0,0.4079200029373169
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,2,128,1,float16,fp8,0,0.3482186794281006
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,4,128,1,float16,float16,0,0.3856319983800252
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,4,128,1,float16,fp8,0,0.3367679913838704
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,float16,0,0.3579519987106323
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,float16,0,0.22392000754674277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,1,128,1,float16,float16,0,0.244704008102417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,1,128,1,float16,fp8,0,0.20360533396402994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,0.3264799912770589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,2,128,1,float16,float16,0,0.2396906614303589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,2,128,1,float16,fp8,0,0.20412800709406534
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,float16,0,0.22369599342346191
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,0.2174826661745707
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,1,128,1,float16,fp8,0,1.1447892983754475
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,1,128,1,float16,float16,0,1.576133410135905
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,1,128,1,float16,fp8,0,1.3631946245829265
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,2,128,1,float16,float16,0,1.5757919947306316
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,2,128,1,float16,fp8,0,1.3645866711934407
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,1,128,1,float16,fp8,0,0.6138453483581543
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,4,128,1,float16,float16,0,1.485856056213379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,float16,0,0.7801067034403483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,4,128,1,float16,fp8,0,1.365008036295573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,1,128,1,float16,float16,0,0.9489493370056152
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,1,128,1,float16,fp8,0,0.7140159606933594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,2,128,1,float16,fp8,0,0.7134666442871094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,2,128,1,float16,float16,0,0.9356799920399984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,4,128,1,float16,float16,0,0.7775999704996744
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,0.21737066904703775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,4,128,1,float16,fp8,0,0.7151573499043783
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,float16,0,0.426144003868103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,float16,0,0.7806506951649984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,4,128,1,float16,fp8,0,0.20423465967178345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,0.7649973233540853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,0.41805867354075116
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,1,128,1,float16,float16,0,0.4248799880345662
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,1,128,1,float16,fp8,0,0.4170453151067098
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,2,128,1,float16,float16,0,0.42525867621103924
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,2,128,1,float16,fp8,0,0.4167626698811849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,4,128,1,float16,float16,0,0.49280532201131183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,4,128,1,float16,fp8,0,0.4179946581522624
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,float16,0,0.42787734667460126
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,1,128,1,float16,float16,0,0.27354133129119873
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,0.3901280164718628
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,float16,0,0.2513333360354106
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,1,128,1,float16,fp8,0,0.24422933657964072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,2,128,1,float16,fp8,0,0.24340800444285074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,2,128,1,float16,float16,0,0.2467306653658549
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,4,128,1,float16,fp8,0,0.22608532508214316
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,float16,0,0.25109867254892987
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,float16,0,0.16053332885106406
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,0.14587733149528503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,1,128,1,float16,float16,0,0.1745013395945231
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,1,128,1,float16,fp8,0,0.1460853318373362
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,2,128,1,float16,float16,0,0.16224533319473267
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,0.7662026882171631
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,2,128,1,float16,fp8,0,0.15666666626930237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,4,128,1,float16,float16,0,0.15904000401496887
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,4,128,1,float16,fp8,0,0.15897066394488016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,float16,0,0.16065067052841187
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,0.14617066582043967
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,1,128,1,float16,float16,0,1.02347199122111
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,1,128,1,float16,fp8,0,0.8561920324961344
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,2,128,1,float16,float16,0,0.8774399757385254
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,4,128,1,float16,float16,0,0.2393653392791748
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,2,128,1,float16,fp8,0,0.8640746275583903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,4,128,1,float16,fp8,0,0.8081653118133545
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,4,128,1,float16,float16,0,1.003168026606242
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,float16,0,0.471013347307841
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,0.46173866589864093
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,0.22739199797312418
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,1,128,1,float16,float16,0,0.5524213314056396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,2,128,1,float16,float16,0,0.5507306655248007
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,2,128,1,float16,fp8,0,0.4296319882074992
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,4,128,1,float16,float16,0,0.24924800793329874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,4,128,1,float16,float16,0,0.46829867362976074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,0.24405866861343384
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,float16,0,0.47086934248606366
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,0.45975999037424725
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,0.258842666943868
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,1,128,1,float16,float16,0,0.26692267258961994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,1,128,1,float16,fp8,0,0.2581599950790405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,2,128,1,float16,float16,0,0.2914186716079712
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,2,128,1,float16,fp8,0,0.25178666909535724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,4,128,1,float16,float16,0,0.29755733410517377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,4,128,1,float16,fp8,0,0.2574293414751689
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,float16,0,0.28918933868408203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,0.25708266099294025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,float16,0,0.15944000085194907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,0.15421866377194723
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,1,128,1,float16,fp8,0,0.15546666582425436
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,1,128,1,float16,float16,0,0.1777120033899943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,2,128,1,float16,float16,0,0.17231466372807822
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,2,128,1,float16,fp8,0,0.14415466785430908
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,4,128,1,float16,float16,0,0.1564906636873881
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,0.15360533197720846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,float16,0,0.1588213344415029
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,float16,0,0.10531199971834819
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,1,128,1,float16,float16,0,0.11310399572054546
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,1,128,1,float16,fp8,0,0.09527466694513957
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,2,128,1,float16,float16,0,0.1035093367099762
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,2,128,1,float16,fp8,0,0.09462400277455647
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,4,128,1,float16,fp8,0,0.09463466207186381
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,1,128,1,float16,fp8,0,0.4589066505432129
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,4,128,1,float16,float16,0,0.10518933335940044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,float16,0,0.10347200433413188
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,0.09472533067067464
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,4,128,1,float16,fp8,0,0.43299198150634766
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,float16,0,0.26447467009226483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,1,128,1,float16,fp8,0,0.7546079953511556
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,1,128,1,float16,float16,0,0.9942239920298258
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,2,128,1,float16,float16,0,0.9947199821472168
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,2,128,1,float16,fp8,0,0.7550026575724283
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,4,128,1,float16,float16,0,0.8749653498331705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,1,128,1,float16,fp8,0,0.42283201217651367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,1,128,1,float16,float16,0,0.5086666742960612
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,4,128,1,float16,fp8,0,0.7564746538798014
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,float16,0,0.437669316927592
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,0.426474650700887
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,2,128,1,float16,float16,0,0.4580426613489787
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,2,128,1,float16,fp8,0,0.3970880111058553
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,4,128,1,float16,fp8,0,0.3978026707967122
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,4,128,1,float16,float16,0,0.5123946666717529
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,float16,0,0.43824533621470135
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,4,128,1,float16,fp8,0,0.15642133355140686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,float16,0,0.24013866980870566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,1,128,1,float16,float16,0,0.26207999388376874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,0.2178986668586731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,1,128,1,float16,fp8,0,0.23386667172114053
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,2,128,1,float16,float16,0,0.27294933795928955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,2,128,1,float16,fp8,0,0.23562665780385336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,4,128,1,float16,float16,0,0.2391200065612793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,4,128,1,float16,fp8,0,0.21979733308156332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,float16,0,0.2566933234532674
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,0.23442665735880533
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,float16,0,0.15324800213178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,1,128,1,float16,float16,0,0.15148799618085226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,0.12634666760762533
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,1,128,1,float16,fp8,0,0.13428266843159994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,2,128,1,float16,float16,0,0.1537546714146932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,2,128,1,float16,fp8,0,0.1363040010134379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,4,128,1,float16,float16,0,0.15152532855669656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,4,128,1,float16,fp8,0,0.1362986663977305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,float16,0,0.1421066621939341
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,float16,0,0.09086933732032776
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,1,128,1,float16,fp8,0,0.08940266569455464
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,1,128,1,float16,float16,0,0.09794666369756062
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,2,128,1,float16,float16,0,0.10016533732414246
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,2,128,1,float16,fp8,0,0.08238400022188823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,4,128,1,float16,float16,0,0.0990773340066274
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,0.08249600231647491
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,float16,0,0.0988159974416097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,float16,0,0.06218666831652323
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.0572266678015391
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,1,128,1,float16,float16,0,0.06444266438484192
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,1,128,1,float16,fp8,0,0.056421334544817604
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,2,128,1,float16,float16,0,0.06248533229033152
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,4,128,1,float16,float16,0,0.0620959997177124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,4,128,1,float16,fp8,0,0.056128000219662987
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,0.42443732420603436
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,float16,0,0.06222933530807495
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.05605866511662801
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,0.09497599800427754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,1,128,1,float16,float16,0,0.5799893140792847
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,1,128,1,float16,fp8,0,0.4593120018641154
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,2,128,1,float16,float16,0,0.5093546708424886
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,2,128,1,float16,fp8,0,0.4904533227284749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,0.12710932890574136
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,0.0890773336092631
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,4,128,1,float16,float16,0,0.5445760091145834
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,4,128,1,float16,fp8,0,0.0886293351650238
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,4,128,1,float16,fp8,0,0.4909706513086955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,0.2469600041707357
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,float16,0,0.27196266253789264
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,1,128,1,float16,fp8,0,0.26232000192006427
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,2,128,1,float16,float16,0,0.31197333335876465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,2,128,1,float16,fp8,0,0.26182933648427326
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,4,128,1,float16,float16,0,0.26689066489537555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,4,128,1,float16,fp8,0,0.2458453377087911
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,2,128,1,float16,fp8,0,0.05718400080998739
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,0.2656053304672241
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,float16,0,0.2712799906730652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,0.1379680037498474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,1,128,1,float16,fp8,0,0.1376159985860189
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,2,128,1,float16,float16,0,0.1682186722755432
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,2,128,1,float16,fp8,0,0.14814399679501852
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,4,128,1,float16,float16,0,0.1504586637020111
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,4,128,1,float16,fp8,0,0.15014933546384177
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,float16,0,0.15388266245524088
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,float16,0,0.0937439997990926
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,1,128,1,float16,float16,0,0.09083200494448344
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,0.08248533308506012
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,1,128,1,float16,fp8,0,0.09075733025868733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,2,128,1,float16,fp8,0,0.08266133566697438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,4,128,1,float16,float16,0,0.09106133381525676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,4,128,1,float16,fp8,0,0.08963200449943542
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,float16,0,0.09385066231091817
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,0.08273600041866302
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,float16,0,0.060090666015942894
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,1,128,1,float16,float16,0,0.06651733318964641
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,1,128,1,float16,fp8,0,0.05569600065549215
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,2,128,1,float16,float16,0,0.0650186687707901
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,2,128,1,float16,fp8,0,0.05593066910902659
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,1,128,1,float16,float16,0,0.26953067382176715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,4,128,1,float16,float16,0,0.06060799956321716
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,4,128,1,float16,fp8,0,0.05517866710821787
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.05607999861240387
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,float16,0,0.04580800235271454
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,1,128,1,float16,float16,0,0.045647998650868736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,float16,0,0.15316800276438394
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.04141866664091746
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,1,128,1,float16,float16,0,0.16211199760437012
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,1,128,1,float16,fp8,0,0.04171733558177948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,2,128,1,float16,float16,0,0.049839998284975685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,2,128,1,float16,fp8,0,0.041696002086003624
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,4,128,1,float16,float16,0,0.04577066500981649
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,4,128,1,float16,fp8,0,0.041759997606277466
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,float16,0,0.045781334241231285
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.04165866722663244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,0.1476906637350718
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,2,128,1,float16,float16,0,0.10058666268984477
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,1,128,1,float16,float16,0,0.514298677444458
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,2,128,1,float16,float16,0,0.5143786668777466
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,4,128,1,float16,float16,0,0.5148640076319376
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,2,128,1,float16,fp8,0,0.47645334402720135
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,4,128,1,float16,fp8,0,0.45154134432474774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,float16,0,0.2634720007578532
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.055125330885251365
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,1,128,1,float16,float16,0,0.2929866711298625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,4,128,1,float16,float16,0,0.25878934065500897
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,2,128,1,float16,float16,0,0.2733653386433919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,0.2513386607170105
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,4,128,1,float16,fp8,0,0.2533386747042338
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,2,128,1,float16,fp8,0,0.2364906668663025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,float16,0,0.061343997716903687
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,float16,0,0.27638399600982666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,float16,0,0.14672533671061197
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,0.14056533575057983
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,1,128,1,float16,float16,0,0.1471733351548513
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,0.253221333026886
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,1,128,1,float16,fp8,0,0.1393280029296875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,2,128,1,float16,float16,0,0.15806399782498678
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,4,128,1,float16,float16,0,0.15482133626937866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,2,128,1,float16,fp8,0,0.13965333501497904
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,4,128,1,float16,fp8,0,0.14030399918556213
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,float16,0,0.15811199943224588
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,0.14035733540852866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,0.0804906686147054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,1,128,1,float16,float16,0,0.086517333984375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,1,128,1,float16,fp8,0,0.0766186664501826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,float16,0,0.09320533275604248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,2,128,1,float16,float16,0,0.09327466289202373
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,2,128,1,float16,fp8,0,0.08147199948628743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,4,128,1,float16,float16,0,0.09108799695968628
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,0.0809440016746521
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,float16,0,0.08657067020734151
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,1,128,1,float16,fp8,0,0.44994131724039715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,1,128,1,float16,float16,0,0.05858133236567179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.05320533116658529
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,1,128,1,float16,fp8,0,0.05000533163547516
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,float16,0,0.054272000988324486
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,2,128,1,float16,float16,0,0.0581279993057251
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,4,128,1,float16,float16,0,0.059402664502461754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,4,128,1,float16,fp8,0,0.049914668003718056
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,float16,0,0.054058666030565895
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.05266666909058889
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,float16,0,0.03326933334271113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.03156266609827677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,1,128,1,float16,float16,0,0.033471999069054924
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,1,128,1,float16,fp8,0,0.03350933392842611
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,2,128,1,float16,float16,0,0.03748800108830134
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,4,128,1,float16,float16,0,0.03326933334271113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,4,128,1,float16,fp8,0,0.033488000432650246
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,float16,0,0.034304000437259674
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,1,128,1,float16,fp8,0,0.2366080085436503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,float16,0,0.03561066587766012
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,1,128,1,float16,float16,0,0.035349334279696144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.033071999748547874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,1,128,1,float16,fp8,0,0.03278400003910065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,2,128,1,float16,float16,0,0.03521066655715307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,2,128,1,float16,fp8,0,0.03323200096686681
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,4,128,1,float16,float16,0,0.035258665680885315
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,4,128,1,float16,fp8,0,0.031957333286603294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.0322773332397143
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,float16,0,0.03319466610749563
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,1,128,1,float16,float16,0,0.3269866704940796
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,1,128,1,float16,fp8,0,0.29993067185084027
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,4,128,1,float16,fp8,0,0.08206933240095775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,2,128,1,float16,float16,0,0.34012266000111896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,2,128,1,float16,fp8,0,0.2850293318430583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,2,128,1,float16,fp8,0,0.05189333359400431
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,4,128,1,float16,float16,0,0.33478399117787677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,4,128,1,float16,fp8,0,0.300053338209788
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,float16,0,0.17014400164286295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,1,128,1,float16,float16,0,0.18392000595728555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,1,128,1,float16,fp8,0,0.16103999813397726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,2,128,1,float16,fp8,0,0.03322133421897888
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,2,128,1,float16,float16,0,0.17650665839513144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,0.15442132949829102
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,4,128,1,float16,float16,0,0.1805973251660665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,2,128,1,float16,fp8,0,0.16288000345230103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.03123733401298523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,4,128,1,float16,fp8,0,0.15319466590881348
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,0.15527466932932535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,0.0879306693871816
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,1,128,1,float16,float16,0,0.10380267103513081
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,float16,0,0.170250674088796
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,1,128,1,float16,fp8,0,0.0855466624101003
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,2,128,1,float16,float16,0,0.09384533762931824
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,4,128,1,float16,float16,0,0.10204266508420308
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,4,128,1,float16,fp8,0,0.09155733386675517
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,0.08676266670227051
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,float16,0,0.09732266267140706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,1,128,1,float16,float16,0,0.0610453337430954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,float16,0,0.05978133281071981
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.05188799897829691
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,1,128,1,float16,fp8,0,0.055776000022888184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,2,128,1,float16,float16,0,0.06229333579540253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,2,128,1,float16,fp8,0,0.05562133093674978
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,4,128,1,float16,fp8,0,0.05566933254400889
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,float16,0,0.06311466793219249
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.05161599814891815
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,1,128,1,float16,float16,0,0.04072533299525579
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.03737599899371465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,1,128,1,float16,fp8,0,0.03753600021203359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,2,128,1,float16,float16,0,0.040821333726247154
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,2,128,1,float16,fp8,0,0.03493333359559377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,4,128,1,float16,float16,0,0.04141866664091746
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,4,128,1,float16,fp8,0,0.036917333801587425
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.03737599899371465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,float16,0,0.03800000001986822
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,float16,0,0.027248000105222065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,1,128,1,float16,float16,0,0.029487999776999157
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,1,128,1,float16,fp8,0,0.027093333502610523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,2,128,1,float16,float16,0,0.029690665503342945
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,2,128,1,float16,fp8,0,0.02716800073782603
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,4,128,1,float16,float16,0,0.027402666707833607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,4,128,1,float16,fp8,0,0.02733866622050603
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,float16,0,0.027424000203609467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.02536533276240031
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,float16,0,0.027280000348885853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.027077332139015198
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,1,128,1,float16,float16,0,0.029114666084448498
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,1,128,1,float16,fp8,0,0.0272533322374026
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,2,128,1,float16,float16,0,0.028768000503381092
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,2,128,1,float16,fp8,0,0.027210667729377747
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,4,128,1,float16,float16,0,0.029130667448043823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,float16,0,0.09755200147628784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,2,128,1,float16,fp8,0,0.08540266752243042
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.027290667096773785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,4,128,1,float16,float16,0,0.0620000014702479
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,1,128,1,float16,fp8,0,0.2972000042597453
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,1,128,1,float16,float16,0,0.35928531487782794
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,float16,0,0.038218667109807335
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.027050666511058807
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,float16,0,0.17437867323557535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,2,128,1,float16,fp8,0,0.30323199431101483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,4,128,1,float16,float16,0,0.3248213330904643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,4,128,1,float16,fp8,0,0.3112586736679077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,0.1584106683731079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,1,128,1,float16,float16,0,0.1702666680018107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,2,128,1,float16,fp8,0,0.156741331020991
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,4,128,1,float16,fp8,0,0.026975999275843304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,4,128,1,float16,float16,0,0.17063466707865396
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,2,128,1,float16,float16,0,0.17406400044759116
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,1,128,1,float16,fp8,0,0.15703999996185303
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,float16,0,0.027109332382678986
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,4,128,1,float16,fp8,0,0.1573919951915741
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,float16,0,0.17442667484283447
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,1,128,1,float16,float16,0,0.10141866405804952
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,0.08849066495895386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,float16,0,0.09768533706665039
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,2,128,1,float16,fp8,0,0.08686932921409607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,2,128,1,float16,float16,0,0.3575146595637004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,2,128,1,float16,float16,0,0.1027893324693044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,0.1666933298110962
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,1,128,1,float16,fp8,0,0.08749333024024963
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,4,128,1,float16,float16,0,0.09514133135477702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,4,128,1,float16,fp8,0,0.09171199798583984
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.053690666953722634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,1,128,1,float16,float16,0,0.0582239975531896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,float16,0,0.09724266330401103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,2,128,1,float16,fp8,0,0.05397866666316986
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,4,128,1,float16,float16,0,0.055888002117474876
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,1,128,1,float16,fp8,0,0.05082666873931885
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,2,128,1,float16,float16,0,0.056330665946006775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,0.09290132919947307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,float16,0,0.05797866483529409
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,1,128,1,float16,float16,0,0.03903999924659729
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,float16,0,0.03563733398914337
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.05128000179926554
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,2,128,1,float16,float16,0,0.037658666570981346
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,2,128,1,float16,fp8,0,0.034832000732421875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,4,128,1,float16,float16,0,0.03542399903138479
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,4,128,1,float16,fp8,0,0.03312533348798752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,float16,0,0.03580799947182337
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.035232000052928925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,float16,0,0.023024000227451324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.021290667355060577
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,1,128,1,float16,float16,0,0.023120000958442688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,1,128,1,float16,fp8,0,0.021482666333516438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,2,128,1,float16,float16,0,0.022815999885400135
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,2,128,1,float16,fp8,0,0.021205333371957142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,4,128,1,float16,float16,0,0.023141334454218548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,4,128,1,float16,fp8,0,0.02143466720978419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,float16,0,0.02317333221435547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.021216000119845074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,float16,0,0.021221332252025604
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,1,128,1,float16,float16,0,0.02145066608985265
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,1,128,1,float16,fp8,0,0.02123733361562093
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,2,128,1,float16,float16,0,0.021151999632517498
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,2,128,1,float16,fp8,0,0.0210506667693456
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,4,128,1,float16,fp8,0,0.02054399996995926
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,float16,0,0.021317332983016968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.021231998999913532
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,float16,0,0.05795733133951823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,float16,0,0.021183999876181286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.019904000063737232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,1,128,1,float16,float16,0,0.02216000109910965
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,1,128,1,float16,fp8,0,0.019482667247454327
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,2,128,1,float16,float16,0,0.021018666525681812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,2,128,1,float16,fp8,0,0.02092266579469045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,4,128,1,float16,float16,0,0.021114667256673176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,4,128,1,float16,fp8,0,0.01937066639463107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,float16,0,0.020960000654061634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.019205333044131596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.034559999903043113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.021269333859284718
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,4,128,1,float16,float16,0,0.023311999936898548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,1,128,1,float16,fp8,0,0.22643733024597168
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,2,128,1,float16,float16,0,0.23919999599456787
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,2,128,1,float16,fp8,0,0.22205867369969687
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,4,128,1,float16,fp8,0,0.05230399966239929
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,1,128,1,float16,fp8,0,0.033301333586374916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,0.1174720029036204
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,float16,0,0.12982933719952902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,4,128,1,float16,float16,0,0.24175999561945596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,1,128,1,float16,fp8,0,0.12227200468381245
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,1,128,1,float16,float16,0,0.12961600224177042
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,4,128,1,float16,fp8,0,0.22639467318852743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,2,128,1,float16,float16,0,0.1275146702925364
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,4,128,1,float16,float16,0,0.1359999974568685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,1,128,1,float16,float16,0,0.23824532826741537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.06638399759928386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,2,128,1,float16,fp8,0,0.1174773375193278
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,float16,0,0.1309706668059031
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,0.1204800009727478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,4,128,1,float16,fp8,0,0.11758933464686076
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,2,128,1,float16,float16,0,0.07173333565394084
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,2,128,1,float16,fp8,0,0.06538666784763336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,1,128,1,float16,float16,0,0.075914666056633
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,float16,0,0.04372799893220266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.03930133332808813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,1,128,1,float16,fp8,0,0.0680320014556249
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.06619200110435486
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,float16,0,0.07267199953397115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,float16,0,0.07259200016657512
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,1,128,1,float16,float16,0,0.04151466737190882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,1,128,1,float16,fp8,0,0.039434666434923805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,4,128,1,float16,float16,0,0.0724533349275589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,4,128,1,float16,fp8,0,0.06838933130105336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,4,128,1,float16,fp8,0,0.0376800000667572
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,2,128,1,float16,fp8,0,0.03751999884843826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,4,128,1,float16,float16,0,0.0440533310174942
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,float16,0,0.04372266431649526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.03789333254098892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,1,128,1,float16,float16,0,0.027136000494162243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.02535466601451238
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,1,128,1,float16,fp8,0,0.025120000044504803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,float16,0,0.027386667827765148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,4,128,1,float16,float16,0,0.027024000883102417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.025258667767047882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,2,128,1,float16,fp8,0,0.024288001159826916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,float16,0,0.027322667340437572
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,1,128,1,float16,float16,0,0.017114666601022083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,float16,0,0.017136000096797943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,1,128,1,float16,fp8,0,0.01534933348496755
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,2,128,1,float16,float16,0,0.01709866647919019
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,4,128,1,float16,float16,0,0.017162666966517765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,2,128,1,float16,fp8,0,0.01699200024207433
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,4,128,1,float16,fp8,0,0.016677333662907284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,float16,0,0.016906666258970898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.015247999380032221
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.014949332922697067
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,float16,0,0.017125333348910015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,1,128,1,float16,float16,0,0.01670933390657107
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,1,128,1,float16,fp8,0,0.014885333677132925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,2,128,1,float16,fp8,0,0.015109332899252573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,4,128,1,float16,float16,0,0.015157333264748255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,float16,0,0.016437333077192307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,2,128,1,float16,float16,0,0.04233066737651825
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.014794666320085526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,float16,0,0.015141333142916361
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,1,128,1,float16,float16,0,0.015125333021084467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,1,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,2,128,1,float16,float16,0,0.015141333142916361
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,2,128,1,float16,fp8,0,0.014954666296641031
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,4,128,1,float16,float16,0,0.015360000232855478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,4,128,1,float16,fp8,0,0.014842666685581207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,float16,0,0.015285332997639975
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.014757333944241205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,1,128,1,float16,float16,0,0.01516266663869222
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,2,128,1,float16,float16,0,0.02827200045188268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,2,128,1,float16,float16,0,0.015018666783968607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,2,128,1,float16,fp8,0,0.014773332824309668
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,4,128,1,float16,float16,0,0.015397333850463232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,4,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,float16,0,0.016208000481128693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,2,128,1,float16,float16,0,0.015034666905800501
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,4,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,1,128,1,float16,float16,0,0.10543466607729594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,1,128,1,float16,fp8,0,0.09506666660308838
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,2,128,1,float16,float16,0,0.10309867064158122
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,2,128,1,float16,fp8,0,0.09292800227801006
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,float16,0,0.014970666418472925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,4,128,1,float16,fp8,0,0.0252960001428922
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,4,128,1,float16,fp8,0,0.0949173370997111
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,float16,0,0.05797333518664042
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,1,128,1,float16,fp8,0,0.014794666320085526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.05194133520126343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,1,128,1,float16,float16,0,0.058378666639328
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,1,128,1,float16,fp8,0,0.05240533252557119
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,2,128,1,float16,float16,0,0.05669333537419637
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,4,128,1,float16,float16,0,0.05681600173314413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,float16,0,0.06085866689682007
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,4,128,1,float16,fp8,0,0.05162133276462555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,2,128,1,float16,fp8,0,0.05193066596984863
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.03169066707293192
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,float16,0,0.03974399964014689
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.052245333790779114
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,1,128,1,float16,float16,0,0.03542399903138479
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,4,128,1,float16,float16,0,0.03583999971548716
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,2,128,1,float16,fp8,0,0.03156800071398417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,4,128,1,float16,fp8,0,0.03176533430814743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,1,128,1,float16,fp8,0,0.03199466566244761
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,float16,0,0.03676799933115641
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.018906666586796444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,1,128,1,float16,float16,0,0.021114667256673176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,float16,0,0.02089600016673406
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.03143999973932902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,1,128,1,float16,fp8,0,0.019306667149066925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,2,128,1,float16,fp8,0,0.01907733331123988
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,4,128,1,float16,float16,0,0.021338666478792827
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,4,128,1,float16,fp8,0,0.0191040001809597
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,2,128,1,float16,float16,0,0.021189334491888683
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,float16,0,0.02093333254257838
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,4,128,1,float16,float16,0,0.10531733433405559
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,float16,0,0.012965332716703415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.013130666067202887
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,1,128,1,float16,float16,0,0.013056000073750814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,4,128,1,float16,float16,0,0.013343999783198038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,2,128,1,float16,float16,0,0.01479999969402949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,4,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,float16,0,0.013088000317414602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,2,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.0129120002190272
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,1,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,1,128,1,float16,float16,0,0.012890666723251343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,2,128,1,float16,float16,0,0.01302933320403099
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,2,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,4,128,1,float16,float16,0,0.012752000242471695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,4,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,float16,0,0.013056000073750814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,float16,0,0.012757333616415659
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,1,128,1,float16,float16,0,0.012896000097195307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,1,128,1,float16,fp8,0,0.012837332983811697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,2,128,1,float16,float16,0,0.012741333494583765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,2,128,1,float16,fp8,0,0.012698666503032049
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,4,128,1,float16,float16,0,0.012874666601419449
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,4,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.012714666624863943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,float16,0,0.013983999689420065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,float16,0,0.012917333592971167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,1,128,1,float16,float16,0,0.012986666212479273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,1,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,2,128,1,float16,float16,0,0.012736000120639801
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,2,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,4,128,1,float16,float16,0,0.012869333227475485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,4,128,1,float16,fp8,0,0.012757333616415659
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,float16,0,0.013034666577974955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,float16,0,0.012805332740147909
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,float16,0,0.012837332983811697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.012682666381200155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,1,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,2,128,1,float16,float16,0,0.01293333371480306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,2,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,4,128,1,float16,float16,0,0.012821332861979803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,4,128,1,float16,fp8,0,0.01209066684047381
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,float16,0,0.012954667210578918
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.012671999633312225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,2,128,1,float16,float16,0,0.03526933242877325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,1,128,1,float16,float16,0,0.06658133367697398
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,2,128,1,float16,float16,0,0.0680159976085027
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,1,128,1,float16,fp8,0,0.06233066817124685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,2,128,1,float16,fp8,0,0.06388799846172333
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,4,128,1,float16,float16,0,0.06731200218200684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,1,128,1,float16,fp8,0,0.012725333372751871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,4,128,1,float16,fp8,0,0.06211733321348826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.03417066733042399
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,1,128,1,float16,float16,0,0.037685332198937736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,1,128,1,float16,fp8,0,0.03461866577466329
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,2,128,1,float16,float16,0,0.037658666570981346
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,1,128,1,float16,float16,0,0.012949333836634954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,4,128,1,float16,fp8,0,0.034154665966828666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,4,128,1,float16,float16,0,0.03788800040880839
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,2,128,1,float16,fp8,0,0.035274667044480644
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,float16,0,0.0415786678592364
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.035216001172860466
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,1,128,1,float16,fp8,0,0.023168000082174938
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,2,128,1,float16,float16,0,0.02309866746266683
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,2,128,1,float16,fp8,0,0.023120000958442688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,4,128,1,float16,float16,0,0.023205332458019257
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,4,128,1,float16,fp8,0,0.02221333235502243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,float16,0,0.023200000325838726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.021488000949223835
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,float16,0,0.015109332899252573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,1,128,1,float16,float16,0,0.014991999914248785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,1,128,1,float16,fp8,0,0.014890667051076889
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,2,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,4,128,1,float16,float16,0,0.014933332800865173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,4,128,1,float16,fp8,0,0.014965333044528961
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,float16,0,0.03928533444801966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,float16,0,0.01515199989080429
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.01462399959564209
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,float16,0,0.010741333166758219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,1,128,1,float16,fp8,0,0.010656000425418219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,1,128,1,float16,float16,0,0.011034666250149408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,2,128,1,float16,float16,0,0.01080000028014183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,2,128,1,float16,fp8,0,0.010602666685978571
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,4,128,1,float16,float16,0,0.011039999624093374
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,4,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,float16,0,0.010703999549150467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.011071999867757162
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,float16,0,0.023210667073726654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,float16,0,0.010661333799362183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,1,128,1,float16,float16,0,0.023354666928450268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,1,128,1,float16,float16,0,0.010992000500361124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,2,128,1,float16,float16,0,0.010778666784365972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,1,128,1,float16,fp8,0,0.010826667149861654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,2,128,1,float16,fp8,0,0.010911999891201654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,4,128,1,float16,float16,0,0.010485333700974783
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,4,128,1,float16,fp8,0,0.011136000355084738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.009093333035707474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,float16,0,0.009818666925032934
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,1,128,1,float16,float16,0,0.010890666395425797
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,1,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,2,128,1,float16,float16,0,0.00983466642598311
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,2,128,1,float16,fp8,0,0.010869332899649939
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,4,128,1,float16,float16,0,0.00956266683836778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,4,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,float16,0,0.010970667004585266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,float16,0,0.009301333377758661
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.009253333633144697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,1,128,1,float16,float16,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,1,128,1,float16,fp8,0,0.00916800027092298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,2,128,1,float16,float16,0,0.01091733326514562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,2,128,1,float16,fp8,0,0.008842666943868002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,4,128,1,float16,float16,0,0.009434666484594345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,4,128,1,float16,fp8,0,0.009082666908701261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,float16,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,float16,0,0.010938666760921478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.010816000401973724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.008853333070874214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,1,128,1,float16,float16,0,0.009077333534757296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,1,128,1,float16,fp8,0,0.009786666681369146
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,2,128,1,float16,float16,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,2,128,1,float16,fp8,0,0.009119999905427298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,4,128,1,float16,float16,0,0.009173333023985228
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,4,128,1,float16,fp8,0,0.009306666751702627
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.00916800027092298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,float16,0,0.009039999917149544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.023333333432674408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,float16,0,0.010496000448862711
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,1,128,1,float16,float16,0,0.05931200087070465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,1,128,1,float16,fp8,0,0.054048001766204834
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,2,128,1,float16,fp8,0,0.05385066568851471
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,4,128,1,float16,float16,0,0.05788800120353699
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,2,128,1,float16,float16,0,0.015210667004187902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,float16,0,0.03172266731659571
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.02962133288383484
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,1,128,1,float16,float16,0,0.0332640012105306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,4,128,1,float16,fp8,0,0.05403199791908264
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,1,128,1,float16,fp8,0,0.031343999008337654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,2,128,1,float16,float16,0,0.033546666304270424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,2,128,1,float16,fp8,0,0.03148799886306127
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,4,128,1,float16,float16,0,0.0322026660044988
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,4,128,1,float16,fp8,0,0.02959466725587845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,float16,0,0.020954666038354237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,float16,0,0.03230399886767069
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.01923199991385142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.029653333127498627
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,1,128,1,float16,float16,0,0.021162666380405426
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,1,128,1,float16,fp8,0,0.020207999895016353
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,2,128,1,float16,fp8,0,0.019359999646743137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,4,128,1,float16,fp8,0,0.019189332922299702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,4,128,1,float16,float16,0,0.021055998901526134
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.019002666076024372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,float16,0,0.013712000101804733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,float16,0,0.021087999145189922
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,1,128,1,float16,float16,0,0.01333333303531011
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,1,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,2,128,1,float16,float16,0,0.014890667051076889
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,2,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,4,128,1,float16,fp8,0,0.012741333494583765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,float16,0,0.012970666090647379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.012863999853531519
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.010741333166758219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,2,128,1,float16,float16,0,0.057376002271970115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,1,128,1,float16,float16,0,0.010794666906197866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,1,128,1,float16,fp8,0,0.010863999525705973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,2,128,1,float16,float16,0,0.010629333555698395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,2,128,1,float16,fp8,0,0.009557333464423815
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,4,128,1,float16,float16,0,0.010666667173306147
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,4,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,float16,0,0.01055466632048289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.00914666677514712
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.009098666409651438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,float16,0,0.009066666786869368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,1,128,1,float16,float16,0,0.008858666444818178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,1,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,2,128,1,float16,float16,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,2,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,4,128,1,float16,float16,0,0.008725333337982496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,4,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,float16,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.008656000097592672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,1,128,1,float16,float16,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,1,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,2,128,1,float16,float16,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,2,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,2,128,1,float16,float16,0,0.020928000410397846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,4,128,1,float16,float16,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,4,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,float16,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.008805333326260248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.012784000486135483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,1,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,1,128,1,float16,float16,0,0.008805333326260248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,2,128,1,float16,float16,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,2,128,1,float16,fp8,0,0.008725333337982496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,4,128,1,float16,float16,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,4,128,1,float16,fp8,0,0.00867733359336853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,float16,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,float16,0,0.008736000085870424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,float16,0,0.01091733326514562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,1,128,1,float16,float16,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,1,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,2,128,1,float16,fp8,0,0.008805333326260248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,4,128,1,float16,float16,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,4,128,1,float16,fp8,0,0.008805333326260248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,float16,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,1,128,1,float16,float16,0,0.05495466788609823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,float16,0,0.008810666700204214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,1,128,1,float16,fp8,0,0.049216002225875854
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,2,128,1,float16,float16,0,0.05392000079154968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,float16,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,float16,0,0.03147733211517334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,4,128,1,float16,float16,0,0.05197866757710775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,4,128,1,float16,fp8,0,0.04806933303674062
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,4,128,1,float16,float16,0,0.014368000129858652
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.027466667195161183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,1,128,1,float16,float16,0,0.03158933420976003
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,1,128,1,float16,fp8,0,0.027215999861558277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,2,128,1,float16,float16,0,0.03139200061559677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,2,128,1,float16,fp8,0,0.029066666960716248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,4,128,1,float16,float16,0,0.03146133323510488
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,4,128,1,float16,fp8,0,0.02717333287000656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.027136000494162243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,float16,0,0.019082666685183842
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,float16,0,0.03128000100453695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,1,128,1,float16,float16,0,0.020874666670958202
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,1,128,1,float16,fp8,0,0.01903466631968816
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,2,128,1,float16,float16,0,0.019466667125622433
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,2,128,1,float16,fp8,0,0.019109333554903667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,4,128,1,float16,float16,0,0.019167999426523846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,4,128,1,float16,fp8,0,0.0189280000825723
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,float16,0,0.019152000546455383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,float16,0,0.01292266696691513
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,1,128,1,float16,float16,0,0.013088000317414602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,1,128,1,float16,fp8,0,0.012714666624863943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,2,128,1,float16,float16,0,0.013050666699806849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,2,128,1,float16,fp8,0,0.012938667088747025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,4,128,1,float16,float16,0,0.013253333667914072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,4,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,float16,0,0.012885333349307379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,2,128,1,float16,fp8,0,0.049973333875338234
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,float16,0,0.010949333508809408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,1,128,1,float16,float16,0,0.010565333068370819
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,1,128,1,float16,fp8,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,2,128,1,float16,float16,0,0.010666667173306147
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,4,128,1,float16,float16,0,0.010853332777818045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,4,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,float16,0,0.01071999967098236
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,2,128,1,float16,float16,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.009088000282645226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,float16,0,0.008832000195980072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,1,128,1,float16,float16,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,1,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,2,128,1,float16,fp8,0,0.008789333204428354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.019093333433071773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,4,128,1,float16,float16,0,0.009077333534757296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,float16,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,float16,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,1,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,1,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,2,128,1,float16,float16,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,2,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.01841066653529803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,4,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,float16,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.008613333106040955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,float16,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,1,128,1,float16,float16,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,1,128,1,float16,fp8,0,0.008837333569924036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,2,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,4,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,4,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,float16,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,2,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.008821333448092142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,float16,0,0.00879466657837232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,2,128,1,float16,float16,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,1,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,2,128,1,float16,fp8,0,0.008853333070874214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,4,128,1,float16,float16,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,4,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,4,128,1,float16,fp8,0,0.008693333094318708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,float16,0,0.008698666468262672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.012714666624863943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,1,128,1,float16,float16,0,0.0540533314148585
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,1,128,1,float16,fp8,0,0.04791999856630961
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,2,128,1,float16,float16,0,0.05169066786766052
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,2,128,1,float16,float16,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,2,128,1,float16,fp8,0,0.04782933493455251
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,4,128,1,float16,float16,0,0.05337599913279215
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,float16,0,0.02937600016593933
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,4,128,1,float16,fp8,0,0.047839999198913574
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,1,128,1,float16,float16,0,0.008725333337982496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.027349332968393963
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,2,128,1,float16,float16,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,1,128,1,float16,fp8,0,0.027306665976842243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,2,128,1,float16,float16,0,0.029674666623274486
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,2,128,1,float16,fp8,0,0.027621333797772724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,4,128,1,float16,float16,0,0.031167998909950256
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,4,128,1,float16,fp8,0,0.027130665878454845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,float16,0,0.030058667063713074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,float16,0,0.019189332922299702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.02734400083621343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,1,128,1,float16,float16,0,0.019215999792019527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,4,128,1,float16,float16,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,1,128,1,float16,fp8,0,0.018917333334684372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,2,128,1,float16,float16,0,0.020266667008399963
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,4,128,1,float16,float16,0,0.01893866683046023
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,4,128,1,float16,fp8,0,0.017103999853134155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,float16,0,0.018965333700180054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.018906666586796444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,float16,0,0.012938667088747025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.012746666868527731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,1,128,1,float16,float16,0,0.013013333082199097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,1,128,1,float16,fp8,0,0.012837332983811697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,2,128,1,float16,float16,0,0.012991999586423239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,2,128,1,float16,fp8,0,0.012944000462690989
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,4,128,1,float16,float16,0,0.012730666746695837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,4,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,float16,0,0.013183999806642532
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.009061333412925402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,1,128,1,float16,float16,0,0.010687999427318573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,1,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,2,128,1,float16,float16,0,0.010645333677530289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,2,128,1,float16,fp8,0,0.009088000282645226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,1,128,1,float16,float16,0,0.03147733211517334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,4,128,1,float16,float16,0,0.010954666882753372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,4,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,1,128,1,float16,float16,0,0.008687999720374743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,float16,0,0.010714666297038397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.008837333569924036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,float16,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,1,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,2,128,1,float16,float16,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,2,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,4,128,1,float16,float16,0,0.009077333534757296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,4,128,1,float16,fp8,0,0.008767999708652496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,float16,0,0.008810666700204214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,1,128,1,float16,float16,0,0.008618666479984919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,float16,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.019018666197856266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,1,128,1,float16,fp8,0,0.008736000085870424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,2,128,1,float16,fp8,0,0.017423999806245167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,4,128,1,float16,float16,0,0.009039999917149544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,4,128,1,float16,fp8,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,float16,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,float16,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,1,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,1,128,1,float16,float16,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,2,128,1,float16,float16,0,0.00873066671192646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,2,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,4,128,1,float16,float16,0,0.00879466657837232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,float16,0,0.009066666786869368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,float16,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,1,128,1,float16,float16,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,2,128,1,float16,float16,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,1,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,2,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,float16,0,0.010640000303586325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,4,128,1,float16,float16,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,float16,0,0.008842666943868002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.00860799973209699
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,0,0.01181866725285848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,2,128,1,float16,float16,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,0,0.016901332885026932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,0,0.014917333920796713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,0,0.01700266698996226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.01482133318980535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,4,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,0,0.008000000069538752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,0,0.007029333462317784
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,0,0.00871999996403853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,0,0.011034666250149408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,4,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,0,0.01173866664369901
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,0,0.006837333242098491
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,0,0.017082666357358296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.01071999967098236
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,0,0.006927999978264173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,0,0.00847999999920527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,0,0.00697066696981589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,0,0.008618666479984919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,0,0.012645332763592402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.01055466632048289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,0,0.006917333230376244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,0,0.00690133310854435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,0,0.006864000111818314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,0,0.006602666651209195
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,0,0.008320000022649765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,2,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,0,0.00690133310854435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,0,0.008821333448092142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,0,0.008320000022649765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,0,0.007018666714429855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.006890666360656421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,0,0.006762666627764702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,0,0.007050666958093643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,0,0.006895999734600385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,0,0.006831999868154526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,0,0.006805333619316419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.0068693334857622785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,0,0.006735999758044879
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,0,0.0069759997228781385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,0,0.0068853336075941724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,0,0.006538666784763336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,0,0.006831999868154526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,0,0.006864000111818314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,0,0.007040000210205714
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,0,0.006810666372378667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,0,0.007055999711155891
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.014767999450365702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,0,0.006826666494210561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,0,0.006671999891599019
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,0,0.0069973332186539965
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.006874666859706243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,0,0.00702400008837382
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,0,0.006751999879876773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,0,0.00684799998998642
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,0,0.006682666639486949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,0,0.006704000135262807
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.00879466657837232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,0,0.006890666360656421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.006826666494210561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,0,0.007120000198483467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,0,0.006650666395823161
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,0,0.006895999734600385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,0,0.006911999856432279
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,0,0.0069759997228781385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,0,0.00684799998998642
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.008714666590094566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.0069333333522081375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,0,0.006858666737874349
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,0,0.00702400008837382
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,0,0.0069386667261521024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,0,0.006906666482488315
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.006640000268816948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,0,0.006911999856432279
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,0,0.006618666773041089
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,0,0.006826666494210561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,0,0.007034666836261749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,0,0.006800000245372455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.006634666894872983
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,1,128,1,float16,float16,0,1.470901330312093
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,float16,0,0.8022666772206625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,0.7341492970784506
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,2,128,1,float16,float16,0,1.4734613100687664
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,2,128,1,float16,fp8,0,1.3498613039652507
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.006735999758044879
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,1,128,1,float16,float16,0,0.9404693444569906
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,1,128,1,float16,fp8,0,0.7811253070831299
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,2,128,1,float16,float16,0,0.8503572940826416
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,2,128,1,float16,fp8,0,0.7348852952321371
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,0.7725013097127279
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,float16,0,0.47337599595387775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,1,128,1,float16,float16,0,0.5262720187505087
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.006741333131988843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,1,128,1,float16,fp8,0,0.45156268278757733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,2,128,1,float16,float16,0,0.49854934215545654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,2,128,1,float16,fp8,0,0.44515732924143475
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,float16,0,0.4930560191472371
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,0.4467466672261556
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,float16,0,0.29730133215586346
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,0.2882773280143738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,1,128,1,float16,float16,0,0.3214026689529419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,1,128,1,float16,fp8,0,0.2836693326632182
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,2,128,1,float16,fp8,0,0.2709386746088664
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,2,128,1,float16,float16,0,0.3186826705932617
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,float16,0,0.3220799962679545
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,0.29004265864690143
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,1,128,1,float16,fp8,0,0.7964853445688883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,1,128,1,float16,float16,0,0.9601866404215494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,2,128,1,float16,float16,0,0.9151733716328939
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,2,128,1,float16,fp8,0,0.8431999683380127
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,float16,0,0.4861493508021037
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,0.4733813206354777
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,1,128,1,float16,float16,0,0.5551573435465494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,1,128,1,float16,fp8,0,0.4713386694590251
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,1,128,1,float16,fp8,0,1.4181599617004395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,2,128,1,float16,float16,0,0.4849280118942261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,2,128,1,float16,fp8,0,0.4716000159581502
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,float16,0,0.5377386808395386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,0.47232532501220703
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,float16,0,0.3102666735649109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,1,128,1,float16,float16,0,0.2922453284263611
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,1,128,1,float16,fp8,0,0.28174932797749835
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,2,128,1,float16,float16,0,0.31089599927266437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,2,128,1,float16,fp8,0,0.28119466702143353
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,float16,0,0.8516373634338379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,0.2823306719462077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,0.4252053499221802
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,0.1853813330332438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,1,128,1,float16,fp8,0,0.186298668384552
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,2,128,1,float16,float16,0,0.2100320061047872
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,2,128,1,float16,fp8,0,0.18607999881108603
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,float16,0,0.2032159964243571
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,0.18447466691335043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,1,128,1,float16,float16,0,0.6665813525517782
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,1,128,1,float16,fp8,0,0.6128213405609131
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,2,128,1,float16,float16,0,0.6669867038726807
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,float16,0,0.39481600125630695
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,0.3396586577097575
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,1,128,1,float16,float16,0,0.3803413311640422
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,1,128,1,float16,fp8,0,0.3264533281326294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,2,128,1,float16,float16,0,0.3861493269602458
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,2,128,1,float16,fp8,0,0.3499946594238281
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,float16,0,0.3895253340403239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,0.2816426753997803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,0.34860801696777344
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,float16,0,0.2403200070063273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,0.21783999601999918
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,1,128,1,float16,float16,0,0.24788800875345865
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,float16,0,0.29212266206741333
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,1,128,1,float16,fp8,0,0.2190720041592916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,float16,0,0.20499199628829956
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,2,128,1,float16,fp8,0,0.21967466672261557
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,1,128,1,float16,float16,0,0.20309333006540933
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,float16,0,0.22229333718617758
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,0.21799467007319132
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,float16,0,0.1477013329664866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,0.13590932885805765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,1,128,1,float16,float16,0,0.14985600113868713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,1,128,1,float16,fp8,0,0.1358560025691986
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,2,128,1,float16,float16,0,0.15042666594187418
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,2,128,1,float16,fp8,0,0.13577600320180258
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,float16,0,0.14757866660753885
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,1,128,1,float16,float16,0,0.9094879627227783
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,1,128,1,float16,fp8,0,0.7469866275787354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,float16,0,0.45611735184987384
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,2,128,1,float16,float16,0,0.8308853308359782
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,2,128,1,float16,fp8,0,0.763375997543335
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,2,128,1,float16,fp8,0,0.6132266521453857
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,1,128,1,float16,float16,0,0.4300533135732015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,0.4184639851252238
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,1,128,1,float16,fp8,0,0.4174400170644124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,2,128,1,float16,float16,0,0.5013226668039957
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,2,128,1,float16,fp8,0,0.3905493418375651
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,float16,0,0.45815467834472656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,float16,0,0.2506879965464274
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,1,128,1,float16,float16,0,0.2714293400446574
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,2,128,1,float16,float16,0,0.26798399289449054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,2,128,1,float16,fp8,0,0.24657066663106283
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,float16,0,0.24995734294255575
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,0.24358399709065756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,float16,0,0.1602133313814799
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,0.14592533310254416
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,1,128,1,float16,float16,0,0.176581343015035
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,2,128,1,float16,float16,0,0.1590986649195353
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,2,128,1,float16,fp8,0,0.15798933307329813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,float16,0,0.16010666886965433
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,2,128,1,float16,float16,0,0.23935999472935995
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,0.14666133125623068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,float16,0,0.10294933120409648
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,0.10103999574979146
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,1,128,1,float16,float16,0,0.11306666334470113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,1,128,1,float16,fp8,0,0.09480533003807068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,0.13586666186650595
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,2,128,1,float16,float16,0,0.1111893355846405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,float16,0,0.10307199756304423
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,0.1011946698029836
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,1,128,1,float16,float16,0,0.46705599625905353
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,1,128,1,float16,fp8,0,0.428330659866333
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,2,128,1,float16,float16,0,0.49566932519276935
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,2,128,1,float16,fp8,0,0.43001067638397217
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,float16,0,0.2634506622950236
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,0.2581226627031962
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,0.24379199743270874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,1,128,1,float16,fp8,0,0.24173865715662637
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,1,128,1,float16,float16,0,0.3016960024833679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,1,128,1,float16,fp8,0,0.2382133404413859
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,2,128,1,float16,float16,0,0.2876746654510498
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,2,128,1,float16,fp8,0,0.2579946716626485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,float16,0,0.2847786744435628
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,0.240447998046875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,float16,0,0.17372800906499228
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,0.15465066830317178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,1,128,1,float16,float16,0,0.17111466328303018
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,1,128,1,float16,fp8,0,0.14341866970062256
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,2,128,1,float16,float16,0,0.17190400759379068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,2,128,1,float16,fp8,0,0.15498133500417074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,1,128,1,float16,fp8,0,0.15781866510709128
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,float16,0,0.1721280018488566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,0.14403200149536133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,float16,0,0.11245333154996236
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,0.10309333602587382
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,1,128,1,float16,float16,0,0.1072213351726532
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,1,128,1,float16,fp8,0,0.1020853320757548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,2,128,1,float16,float16,0,0.10309867064158122
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,2,128,1,float16,fp8,0,0.10090667009353638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,float16,0,0.11414399743080139
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,0.10115733742713928
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,float16,0,0.084389328956604
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.07262933254241943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,1,128,1,float16,float16,0,0.08377599716186523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,1,128,1,float16,fp8,0,0.07860266665617625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,2,128,1,float16,float16,0,0.08478933572769165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,2,128,1,float16,fp8,0,0.07874133189519246
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.07829333345095317
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,float16,0,0.07860800127188365
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,2,128,1,float16,fp8,0,0.10179199775060017
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,2,128,1,float16,float16,0,0.4916906754175822
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,float16,0,0.2389706571896871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,0.2330346703529358
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,1,128,1,float16,fp8,0,0.4254666566848755
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,1,128,1,float16,float16,0,0.26793599128723145
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,1,128,1,float16,fp8,0,0.23280000686645508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,2,128,1,float16,fp8,0,0.23730132977167764
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,0.4127093156178792
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,float16,0,0.14166399836540222
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,float16,0,0.25919467210769653
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,0.2343626618385315
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,0.1362879971663157
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,2,128,1,float16,fp8,0,0.134442667166392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,1,128,1,float16,fp8,0,0.12570666273434958
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,0.13382400075594583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,float16,0,0.0906880001227061
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,0.08197866876920064
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,1,128,1,float16,float16,0,0.0904266635576884
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,1,128,1,float16,fp8,0,0.08270933230717976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,2,128,1,float16,float16,0,0.09079999725023906
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,2,128,1,float16,fp8,0,0.08264000217119853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,float16,0,0.09086400270462036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,0.08250666658083598
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,float16,0,0.056405335664749146
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.055999999245007835
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,1,128,1,float16,float16,0,0.05667200187842051
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,1,128,1,float16,fp8,0,0.05667733152707418
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,2,128,1,float16,float16,0,0.056015998125076294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,2,128,1,float16,fp8,0,0.41919998327891034
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,1,128,1,float16,float16,0,0.5105599959691366
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,2,128,1,float16,fp8,0,0.05741333464781443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,float16,0,0.056645333766937256
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.05604266623655955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.05573866764704386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,float16,0,0.0602400004863739
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,1,128,1,float16,float16,0,0.055999999245007835
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,1,128,1,float16,fp8,0,0.05609600245952606
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,2,128,1,float16,float16,0,0.0602453351020813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,2,128,1,float16,fp8,0,0.05599466462930044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,float16,0,0.05589333176612854
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.0558186670144399
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,1,128,1,float16,float16,0,0.27854933341344196
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,1,128,1,float16,fp8,0,0.261354664961497
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,2,128,1,float16,float16,0,0.2673226594924927
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,1,128,1,float16,float16,0,0.15058133006095886
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,float16,0,0.15681599577267966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,1,128,1,float16,float16,0,0.15122666954994202
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,float16,0,0.15337066849072775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,0.15061333775520325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,2,128,1,float16,float16,0,0.16908800601959229
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,2,128,1,float16,fp8,0,0.13822399576505026
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,float16,0,0.15217066804567972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,0.148799995581309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,float16,0,0.1027946670850118
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,0.08287466565767924
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,1,128,1,float16,float16,0,0.10116799672444661
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,1,128,1,float16,fp8,0,0.09054932991663615
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,2,128,1,float16,float16,0,0.10073066751162212
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,2,128,1,float16,fp8,0,0.09037866195042928
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,float16,0,0.0682239979505539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,0.08437333504358928
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.06010133524735769
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,1,128,1,float16,float16,0,0.0673280010620753
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,1,128,1,float16,fp8,0,0.05967999994754791
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,2,128,1,float16,float16,0,0.06631466746330261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,2,128,1,float16,fp8,0,0.06021333237489065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.05938666562239329
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,2,128,1,float16,float16,0,0.2688106695810954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,float16,0,0.04990399877230326
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,1,128,1,float16,float16,0,0.049973333875338234
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,1,128,1,float16,fp8,0,0.04387199878692627
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,2,128,1,float16,float16,0,0.04961066444714864
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,2,128,1,float16,fp8,0,0.26180267333984375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,2,128,1,float16,float16,0,0.15130133430163065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,2,128,1,float16,fp8,0,0.04404266675313314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,float16,0,0.04783466458320618
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,float16,0,0.04782933493455251
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,1,128,1,float16,fp8,0,0.14894400040308634
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.04353600243727366
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,1,128,1,float16,float16,0,0.04784533381462097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,1,128,1,float16,fp8,0,0.04342933495839437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,2,128,1,float16,float16,0,0.047450666626294456
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,2,128,1,float16,fp8,0,0.043605332573254905
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,float16,0,0.04376000165939331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.043738668163617454
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,1,128,1,float16,float16,0,0.2945280075073242
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,float16,0,0.0932426651318868
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,1,128,1,float16,fp8,0,0.2516319950421651
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,2,128,1,float16,float16,0,0.2720800042152405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,float16,0,0.14411733547846475
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,2,128,1,float16,fp8,0,0.25150400400161743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,float16,0,0.06597866614659627
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,0.14084800084431967
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,1,128,1,float16,float16,0,0.15331733226776123
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.043706665436426796
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,1,128,1,float16,fp8,0,0.1402186652024587
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,2,128,1,float16,float16,0,0.14751999576886496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,2,128,1,float16,fp8,0,0.14033599694569907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,float16,0,0.14511467019716898
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,float16,0,0.0867039958635966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,0.1402720014254252
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,0.08224000036716461
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,1,128,1,float16,float16,0,0.09075199564297994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,1,128,1,float16,fp8,0,0.07717866698900859
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,2,128,1,float16,float16,0,0.09079466263453166
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,float16,0,0.09175999959309895
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,0.07634133100509644
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,float16,0,0.05806399881839752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.04979733129342397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,1,128,1,float16,float16,0,0.05975466469923655
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,1,128,1,float16,fp8,0,0.052527998884518944
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,2,128,1,float16,float16,0,0.05563200016816457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,2,128,1,float16,fp8,0,0.052330667773882546
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,float16,0,0.058058664202690125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.05332799752553304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,float16,0,0.033930666744709015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.03340800106525421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,1,128,1,float16,float16,0,0.035455999275048576
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,1,128,1,float16,fp8,0,0.033471999069054924
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,2,128,1,float16,float16,0,0.03732266773780187
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,float16,0,0.036373332142829895
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.0334346666932106
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,float16,0,0.03522133330504099
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,1,128,1,float16,float16,0,0.035274667044480644
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,1,128,1,float16,fp8,0,0.03271999955177307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,2,128,1,float16,float16,0,0.03558400024970373
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,2,128,1,float16,fp8,0,0.03143999973932902
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,float16,0,0.03555200000603994
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.03133333226044973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,float16,0,0.03403733422358831
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.03164800008138021
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,1,128,1,float16,float16,0,0.03425066669782003
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,1,128,1,float16,fp8,0,0.030495998760064442
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,2,128,1,float16,float16,0,0.03374933451414108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,2,128,1,float16,fp8,0,0.03155199935038885
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,float16,0,0.033557333052158356
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.030389333764712017
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,1,128,1,float16,float16,0,0.17856534322102866
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,1,128,1,float16,fp8,0,0.16140266259511313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,2,128,1,float16,float16,0,0.17803200085957846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,2,128,1,float16,fp8,0,0.1534346640110016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,float16,0,0.09753066301345825
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,0.09357866644859314
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,2,128,1,float16,fp8,0,0.0819413314263026
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.04378133515516917
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,2,128,1,float16,float16,0,0.09520000219345093
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,1,128,1,float16,float16,0,0.10442133744557698
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,float16,0,0.09723732868830363
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,0.09307199716567993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,float16,0,0.05972800155480703
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,1,128,1,float16,float16,0,0.06273066500822704
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,2,128,1,float16,fp8,0,0.031514666974544525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,1,128,1,float16,fp8,0,0.055770665407180786
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,2,128,1,float16,float16,0,0.061205332477887474
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.033189333975315094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,float16,0,0.05938133100668589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.051776001850763954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.03734400123357773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,float16,0,0.041706666350364685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,1,128,1,float16,float16,0,0.041536000867684685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,1,128,1,float16,fp8,0,0.0373279998699824
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,2,128,1,float16,float16,0,0.04162133236726125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,2,128,1,float16,fp8,0,0.03722133239110311
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.03535466641187668
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.027232001225153606
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,1,128,1,float16,float16,0,0.02940800040960312
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,float16,0,0.02743999908367793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,1,128,1,float16,fp8,0,0.0273333340883255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,2,128,1,float16,float16,0,0.028010666370391846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,2,128,1,float16,fp8,0,0.027274665733178455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,float16,0,0.029333333174387615
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.027050666511058807
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,float16,0,0.028917332490285236
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.027082666754722595
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,1,128,1,float16,float16,0,0.02914133419593175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,1,128,1,float16,fp8,0,0.027029333015282948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,2,128,1,float16,float16,0,0.028650666276613872
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,2,128,1,float16,fp8,0,0.02714666724205017
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,float16,0,0.028789333999156952
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.02700799951950709
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,2,128,1,float16,fp8,0,0.08681600292523702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,float16,0,0.027450665831565857
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.02628266563018163
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,1,128,1,float16,fp8,0,0.026682667434215546
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.05638400216897329
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,2,128,1,float16,float16,0,0.027248000105222065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,2,128,1,float16,fp8,0,0.02589333305756251
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,float16,0,0.028373333315054577
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.02717333287000656
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,2,128,1,float16,fp8,0,0.05596266686916351
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,float16,0,0.04154133299986521
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,1,128,1,float16,float16,0,0.18202133973439535
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,1,128,1,float16,fp8,0,0.16586666305859885
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,fp8,0,0.09110400080680847
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,2,128,1,float16,fp8,0,0.15834133823712668
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,float16,0,0.09664000074068706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,2,128,1,float16,float16,0,0.18374399344126383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,1,128,1,float16,float16,0,0.09763200084368388
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,1,128,1,float16,fp8,0,0.09194133679072063
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,1,128,1,float16,fp8,0,0.08541867136955261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.0539626677831014
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,2,128,1,float16,fp8,0,0.08672533432642619
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,1,128,1,float16,float16,0,0.027690666417280834
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,2,128,1,float16,float16,0,0.09849066535631816
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,1,128,1,float16,float16,0,0.054341331124305725
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,float16,0,0.06020799775918325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,1,128,1,float16,fp8,0,0.05324266850948334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,2,128,1,float16,float16,0,0.06053866446018219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,2,128,1,float16,fp8,0,0.04990399877230326
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.034058667719364166
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.05057600140571594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,1,128,1,float16,float16,0,0.03756266583998998
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,1,128,1,float16,fp8,0,0.03298133363326391
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,2,128,1,float16,float16,0,0.038047999143600464
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,2,128,1,float16,fp8,0,0.033029332756996155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,float16,0,0.03843733419974645
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,float16,0,0.022842665513356526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.021365332106749218
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,1,128,1,float16,float16,0,0.023370665808518726
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,1,128,1,float16,fp8,0,0.02102400114138921
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,2,128,1,float16,float16,0,0.02293333411216736
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,float16,0,0.022991999983787537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.021226666867733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,float16,0,0.021114667256673176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.021029333273569744
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,1,128,1,float16,float16,0,0.021253332495689392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,1,128,1,float16,fp8,0,0.02125866711139679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,2,128,1,float16,float16,0,0.020848001043001812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,2,128,1,float16,fp8,0,0.020975999534130096
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,float16,0,0.02094399929046631
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.02120000123977661
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,float16,0,0.02102400114138921
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,fp8,0,0.09246399998664856
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.021130666136741638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,2,128,1,float16,float16,0,0.021253332495689392
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,1,128,1,float16,fp8,0,0.02024000013868014
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,2,128,1,float16,fp8,0,0.02025066688656807
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,float16,0,0.0580320010582606
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,float16,0,0.03765333443880081
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,float16,0,0.02222399910291036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.020928000410397846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.021114667256673176
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,1,128,1,float16,float16,0,0.021850667893886566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,1,128,1,float16,fp8,0,0.02054399996995926
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,float16,0,0.021157334248224895
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.034272000193595886
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,2,128,1,float16,float16,0,0.02111999938885371
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.01907733331123988
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,float16,0,0.021317332983016968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,2,128,1,float16,fp8,0,0.021375998854637146
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,1,128,1,float16,float16,0,0.13310933113098145
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,1,128,1,float16,fp8,0,0.11755733688672383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,float16,0,0.07237333556016286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,float16,0,0.09690666198730469
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,1,128,1,float16,float16,0,0.02089600016673406
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,2,128,1,float16,float16,0,0.12797866264979044
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,2,128,1,float16,fp8,0,0.11711999773979187
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.06549333532651265
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,1,128,1,float16,float16,0,0.0718560020128886
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,1,128,1,float16,fp8,0,0.06525333225727081
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,2,128,1,float16,fp8,0,0.06451199948787689
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,2,128,1,float16,float16,0,0.0766293356815974
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,float16,0,0.045696000258127846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.06819200019041698
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,2,128,1,float16,fp8,0,0.021168000996112823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.037503999968369804
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,1,128,1,float16,float16,0,0.04159999887148539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.023770667612552643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,2,128,1,float16,float16,0,0.04423999786376953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,2,128,1,float16,fp8,0,0.03921066721280416
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,float16,0,0.0271573339899381
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,float16,0,0.04502933224042257
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,1,128,1,float16,float16,0,0.026352000733216602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,2,128,1,float16,float16,0,0.027349332968393963
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,1,128,1,float16,fp8,0,0.024549332757790882
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,2,128,1,float16,fp8,0,0.025301332275072735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,float16,0,0.025594666600227356
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,float16,0,0.01704000060757001
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.023610666394233704
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.01720533271630605
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,1,128,1,float16,float16,0,0.017173333714405697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,1,128,1,float16,fp8,0,0.01703466723362605
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,2,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,2,128,1,float16,float16,0,0.01718933383623759
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,float16,0,0.016938666502634685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,float16,0,0.017231999586025875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,1,128,1,float16,float16,0,0.015557333827018738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,1,128,1,float16,fp8,0,0.014965333044528961
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,2,128,1,float16,float16,0,0.01710933322707812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,2,128,1,float16,fp8,0,0.014831999937693277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,float16,0,0.015087999403476715
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,float16,0,0.01515199989080429
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.014975999792416891
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,1,128,1,float16,float16,0,0.015029333531856537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,1,128,1,float16,fp8,0,0.014805333067973455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,2,128,1,float16,float16,0,0.014975999792416891
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,2,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,float16,0,0.015119999647140503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,float16,0,0.0758186678091685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.015008000036080679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,float16,0,0.015082667271296183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.014725333700577417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,1,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,2,128,1,float16,float16,0,0.015311999867359797
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,float16,0,0.015066667149464289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,1,128,1,float16,float16,0,0.01516266663869222
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.03833066672086716
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,float16,0,0.015119999647140503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.014933332800865173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,1,128,1,float16,float16,0,0.015098666151364645
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,2,128,1,float16,float16,0,0.015034666905800501
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,2,128,1,float16,fp8,0,0.0161920003592968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,float16,0,0.014991999914248785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,1,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.014981333166360855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,1,128,1,float16,float16,0,0.0580266664425532
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,1,128,1,float16,fp8,0,0.05087999999523163
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,2,128,1,float16,fp8,0,0.05186133086681366
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,2,128,1,float16,float16,0,0.05685866872469584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,float16,0,0.03722133239110311
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.03136533250411352
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,1,128,1,float16,float16,0,0.033786666889985405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,1,128,1,float16,fp8,0,0.031541332602500916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,2,128,1,float16,float16,0,0.035717333356539406
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,2,128,1,float16,fp8,0,0.03146666785081228
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,2,128,1,float16,fp8,0,0.01481066644191742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,float16,0,0.037530665596326195
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,1,128,1,float16,fp8,0,0.03945599993069967
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,float16,0,0.02107733239730199
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.03141866624355316
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,1,128,1,float16,float16,0,0.021029333273569744
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.019226666539907455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,1,128,1,float16,fp8,0,0.019178666174411774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,2,128,1,float16,float16,0,0.02141333371400833
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,2,128,1,float16,fp8,0,0.019066666563351948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,float16,0,0.014335999886194864
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,1,128,1,float16,float16,0,0.014117332796255747
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,float16,0,0.021151999632517498
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,1,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,2,128,1,float16,float16,0,0.014757333944241205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,2,128,1,float16,fp8,0,0.013130666067202887
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,float16,0,0.01479999969402949
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.013183999806642532
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.01268799975514412
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,1,128,1,float16,fp8,0,0.012618667135636011
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,float16,0,0.013066666821638743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,2,128,1,float16,float16,0,0.012997332960367203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,2,128,1,float16,fp8,0,0.012703999876976013
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,float16,0,0.012794667234023413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,1,128,1,float16,float16,0,0.012997332960367203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,float16,0,0.012773333738247553
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,1,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,2,128,1,float16,float16,0,0.012784000486135483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,float16,0,0.012757333616415659
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.01268799975514412
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,float16,0,0.012837332983811697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.012714666624863943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,1,128,1,float16,float16,0,0.012661332885424295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,1,128,1,float16,fp8,0,0.012757333616415659
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,2,128,1,float16,fp8,0,0.012698666503032049
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,float16,0,0.012837332983811697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.012757333616415659
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,float16,0,0.012725333372751871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.012416000167528788
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,1,128,1,float16,float16,0,0.012762666990359625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,1,128,1,float16,fp8,0,0.011871999750534693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,2,128,1,float16,float16,0,0.012762666990359625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,2,128,1,float16,fp8,0,0.012645332763592402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,float16,0,0.012682666381200155
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,float16,0,0.012650666137536367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,1,128,1,float16,float16,0,0.013130666067202887
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,1,128,1,float16,float16,0,0.012703999876976013
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,1,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,2,128,1,float16,float16,0,0.012730666746695837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,2,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,2,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.011685332904259363
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,float16,0,0.01301866645614306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,2,128,1,float16,float16,0,0.012965332716703415
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,1,128,1,float16,float16,0,0.037578667203585304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,1,128,1,float16,fp8,0,0.03422400106986364
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,2,128,1,float16,float16,0,0.03903999924659729
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,float16,0,0.02317333221435547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.02242133269707362
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,1,128,1,float16,float16,0,0.023333333432674408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,1,128,1,float16,fp8,0,0.022976001103719074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,2,128,1,float16,fp8,0,0.02256533255179723
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,2,128,1,float16,float16,0,0.02369600037733714
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,float16,0,0.014741333822409311
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,1,128,1,float16,float16,0,0.015103999525308609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,float16,0,0.02314666658639908
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,1,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.021695998807748158
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,2,128,1,float16,float16,0,0.015061333775520325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,2,128,1,float16,fp8,0,0.014815999815861383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,float16,0,0.010757333288590113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,float16,0,0.015050667027632395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.012719999998807907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.010757333288590113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,1,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,2,128,1,float16,fp8,0,0.010853332777818045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,float16,0,0.010954666882753372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.01073066641887029
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,float16,0,0.010682666053374609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.010634666929642359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,1,128,1,float16,float16,0,0.011018666128317514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,1,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,2,128,1,float16,float16,0,0.010965333630641302
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,2,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,float16,0,0.01102399950226148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.010442666709423065
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,float16,0,0.010565333068370819
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.00915733352303505
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,1,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,1,128,1,float16,float16,0,0.010944000134865442
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,2,128,1,float16,float16,0,0.010832000523805618
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,2,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,2,128,1,float16,fp8,0,0.03450666616360346
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,float16,0,0.010650667051474253
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.010314666976531347
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,float16,0,0.010762666662534079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,1,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,1,128,1,float16,float16,0,0.010944000134865442
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,2,128,1,float16,float16,0,0.010714666297038397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,2,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,float16,0,0.010410666465759277
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.009152000149091085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.009573333586255709
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,float16,0,0.010885333021481832
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,1,128,1,float16,float16,0,0.009872000043590864
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,1,128,1,float16,fp8,0,0.009098666409651438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,2,128,1,float16,float16,0,0.0107893335322539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,2,128,1,float16,fp8,0,0.009152000149091085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.014645333091417948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,float16,0,0.009765333185593287
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.009695999945203463
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,1,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,2,128,1,float16,float16,0,0.010698666175206503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,1,128,1,float16,float16,0,0.008762666955590248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,2,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,1,128,1,float16,float16,0,0.01099733387430509
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,2,128,1,float16,float16,0,0.010672000547250112
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,1,128,1,float16,float16,0,0.03331733246644338
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,1,128,1,float16,fp8,0,0.03018666555484136
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,2,128,1,float16,float16,0,0.03325333446264267
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,2,128,1,float16,fp8,0,0.029482667644818623
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,float16,0,0.020879998803138733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.01911466692884763
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,1,128,1,float16,float16,0,0.021338666478792827
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,1,128,1,float16,fp8,0,0.019237333287795384
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,2,128,1,float16,float16,0,0.021061333517233532
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,float16,0,0.021087999145189922
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,float16,0,0.013455999394257864
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.019173332800467808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.012815999488035837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,float16,0,0.01097600037852923
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,1,128,1,float16,float16,0,0.014826666563749313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,1,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,2,128,1,float16,float16,0,0.014826666563749313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,float16,0,0.015082667271296183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,float16,0,0.0107893335322539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,float16,0,0.010741333166758219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.010634666929642359
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,1,128,1,float16,float16,0,0.010677333921194077
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,1,128,1,float16,fp8,0,0.009658666948477427
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,2,128,1,float16,float16,0,0.010784000158309937
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,2,128,1,float16,fp8,0,0.010709332923094431
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,float16,0,0.010949333508809408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.009322666873534521
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,float16,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,1,128,1,float16,float16,0,0.010629333555698395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,1,128,1,float16,fp8,0,0.008853333070874214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,2,128,1,float16,float16,0,0.009114666531483332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,2,128,1,float16,fp8,0,0.008858666444818178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,float16,0,0.010784000158309937
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,float16,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.008837333569924036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,1,128,1,float16,float16,0,0.00916800027092298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,2,128,1,float16,float16,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,2,128,1,float16,fp8,0,0.00919999989370505
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,float16,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.008858666444818178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,float16,0,0.009109333157539368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,1,128,1,float16,fp8,0,0.00871999996403853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,2,128,1,float16,fp8,0,0.019141333798567455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,2,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,2,128,1,float16,fp8,0,0.008832000195980072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,float16,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,float16,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,2,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,2,128,1,float16,float16,0,0.009103999783595404
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,2,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,float16,0,0.009077333534757296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,1,128,1,float16,float16,0,0.008842666943868002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,1,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,2,128,1,float16,float16,0,0.009056000038981438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,2,128,1,float16,fp8,0,0.008789333204428354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,float16,0,0.008837333569924036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,1,128,1,float16,float16,0,0.031104000906149547
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,1,128,1,float16,fp8,0,0.027493332823117573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,2,128,1,float16,float16,0,0.029418667157491047
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,1,128,1,float16,float16,0,0.008703999842206636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,float16,0,0.018911999960740406
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,2,128,1,float16,fp8,0,0.027077332139015198
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,1,128,1,float16,float16,0,0.01950399950146675
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,1,128,1,float16,float16,0,0.009178666397929192
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,1,128,1,float16,fp8,0,0.019071999937295914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,2,128,1,float16,float16,0,0.01907733331123988
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,2,128,1,float16,fp8,0,0.019088000059127808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,float16,0,0.01312000056107839
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.019130667050679524
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,1,128,1,float16,float16,0,0.01313599944114685
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,1,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,2,128,1,float16,float16,0,0.013013333082199097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,2,128,1,float16,fp8,0,0.012655999511480331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,float16,0,0.013162666310866674
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,float16,0,0.010970667004585266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.012698666503032049
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,1,128,1,float16,float16,0,0.010938666760921478
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,1,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,2,128,1,float16,float16,0,0.010693332801262537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,2,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,float16,0,0.010949333508809408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.009205333267649015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,float16,0,0.008709333216150602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,1,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,1,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,2,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,float16,0,0.008816000074148178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.008832000195980072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,float16,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.01912533367673556
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,1,128,1,float16,fp8,0,0.008767999708652496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,2,128,1,float16,float16,0,0.008799999952316284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,1,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,float16,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,float16,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,1,128,1,float16,float16,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,1,128,1,float16,fp8,0,0.008842666943868002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,2,128,1,float16,float16,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,float16,0,0.019215999792019527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,2,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,float16,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,float16,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,1,128,1,float16,float16,0,0.008778666456540426
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,1,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,2,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,2,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,float16,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,1,128,1,float16,fp8,0,0.008767999708652496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,1,128,1,float16,float16,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,1,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,2,128,1,float16,float16,0,0.00877333308259646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,2,128,1,float16,fp8,0,0.008837333569924036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,2,128,1,float16,float16,0,0.008858666444818178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,float16,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.008602666358153025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,1,128,1,float16,float16,0,0.00877333308259646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,2,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,1,128,1,float16,float16,0,0.031130666534105938
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,1,128,1,float16,fp8,0,0.027376001079877216
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,float16,0,0.019066666563351948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,2,128,1,float16,fp8,0,0.027471999327341717
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.008709333216150602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,1,128,1,float16,fp8,0,0.01912533367673556
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,2,128,1,float16,float16,0,0.019071999937295914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,2,128,1,float16,fp8,0,0.01754133279124896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,float16,0,0.012874666601419449
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.01842133328318596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.012549333274364471
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,1,128,1,float16,float16,0,0.013013333082199097
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.009717333440979322
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,1,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,2,128,1,float16,float16,0,0.012847999731699625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,float16,0,0.010874666273593903
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,1,128,1,float16,float16,0,0.010992000500361124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,1,128,1,float16,fp8,0,0.00916800027092298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,2,128,1,float16,float16,0,0.010778666784365972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,2,128,1,float16,fp8,0,0.009296000003814697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,float16,0,0.010757333288590113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.009114666531483332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,float16,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,1,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,2,128,1,float16,float16,0,0.03130666663249334
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,2,128,1,float16,float16,0,0.009183999771873156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,2,128,1,float16,fp8,0,0.008842666943868002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,float16,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,1,128,1,float16,float16,0,0.019205333044131596
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,1,128,1,float16,float16,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,1,128,1,float16,fp8,0,0.008858666444818178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,2,128,1,float16,float16,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,2,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,float16,0,0.01923199991385142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,float16,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.008559999987483025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,float16,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,1,128,1,float16,float16,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,1,128,1,float16,fp8,0,0.008837333569924036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,2,128,1,float16,float16,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,2,128,1,float16,fp8,0,0.00874133345981439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,float16,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.008832000195980072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,float16,0,0.008736000085870424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,float16,0,0.013151999562978745
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.008602666358153025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,1,128,1,float16,fp8,0,0.008672000219424566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,2,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,2,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,float16,0,0.008693333094318708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,float16,0,0.008656000097592672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.008789333204428354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,1,128,1,float16,float16,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,1,128,1,float16,fp8,0,0.008634666601816813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,2,128,1,float16,float16,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,2,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,float16,0,0.008597333605090777
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,1,128,1,float16,float16,0,0.008821333448092142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.00873066671192646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,float16,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,0,0.008618666479984919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,0,0.012362666428089142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,0,0.010714666297038397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,0,0.011007999380429586
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.010869332899649939
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,0,0.0069333333522081375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,0,0.008687999720374743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,0,0.01239466667175293
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,2,128,1,float16,fp8,0,0.012645332763592402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,0,0.009056000038981438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.00877333308259646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,0,0.0069386667261521024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,0,0.006831999868154526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,0,0.008618666479984919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,0,0.007167999943097432
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,0,0.008693333094318708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.008805333326260248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.00701333334048589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,0,0.007055999711155891
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,0,0.006640000268816948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,0,0.0069226666043202085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,0,0.006805333619316419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,0,0.007941333577036858
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.006837333242098491
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,0,0.00679466687142849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,0,0.006528000036875407
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,0,0.006821333120266597
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,0,0.006773333375652631
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,0,0.006613333399097125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.00666133314371109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,0,0.006826666494210561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.0068853336075941724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,0,0.006890666360656421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,0,0.006874666859706243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,0,0.007151999821265538
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,0,0.0068800002336502075
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,0,0.006805333619316419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.006853333363930385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.010847999403874079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,0,0.006618666773041089
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.00655466690659523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,0,0.006709333509206772
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,0,0.006725333631038666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,0,0.006800000245372455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,0,0.0068693334857622785
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,0,0.006789333497484525
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.006730666384100914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,0,0.006725333631038666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.006949333474040031
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,0,0.00697066696981589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,0,0.006826666494210561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,0,0.00697066696981589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,0,0.006735999758044879
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,0,0.006853333363930385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,0,0.0068853336075941724
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,0,0.006575999781489372
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.006906666482488315
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,0,0.006629333520929019
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,0,0.007770666852593422
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,0,0.006906666482488315
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,0,0.00697066696981589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.00697066696981589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,2,1,128,1,float16,float16,0,0.7999040285746256
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,float16,0,0.48908265431722003
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,2,1,128,1,float16,fp8,0,0.7671466668446859
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,0.4512053330739339
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,1,128,1,float16,float16,0,0.5017706553141276
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,1,128,1,float16,fp8,0,0.4248693386713664
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,float16,0,0.3169706662495931
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,0.4500853220621745
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,0.28549333413441974
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,1,128,1,float16,float16,0,0.3211839993794759
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,1,128,1,float16,fp8,0,0.2887466748555501
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,1,128,1,float16,float16,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,float16,0,0.32259200016657513
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,float16,0,0.20859734217325845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,0.27194132407506305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,0.18699200948079428
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,1,128,1,float16,float16,0,0.20775467157363892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,1,128,1,float16,fp8,0,0.18914133310317993
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,float16,0,0.19559999306996664
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,0.18820265928904215
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,2,1,128,1,float16,float16,0,0.5250240166982015
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.00696000022192796
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,float16,0,0.306005338827769
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,2,1,128,1,float16,fp8,0,0.44360534350077313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,0.28058133522669476
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.006842666616042455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,0,0.006917333230376244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,1,128,1,float16,float16,0,0.31247466802597046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,float16,0,0.3061013420422872
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,float16,0,0.19079999128977457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,0.2820533315340678
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,0.17758933703104654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,1,128,1,float16,float16,0,0.20004266500473022
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,1,128,1,float16,fp8,0,0.18595200777053833
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,float16,0,0.20599466562271118
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,0.17374932765960693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,0.1400320033232371
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,1,128,1,float16,float16,0,0.15651200215021768
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,1,128,1,float16,fp8,0,0.13753066460291544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,float16,0,0.1588159998257955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,2,1,128,1,float16,float16,0,0.3612533410390218
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,float16,0,0.23758399486541748
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,2,1,128,1,float16,fp8,0,0.3410986661911011
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,0.21811199188232422
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,1,128,1,float16,float16,0,0.23972266912460327
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,1,128,1,float16,fp8,0,0.21644800901412964
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,float16,0,0.24023467302322388
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,0.2181546688079834
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,0.13618133465449014
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,float16,0,0.5172906716664633
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,1,128,1,float16,float16,0,0.14918933312098184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,1,128,1,float16,fp8,0,0.13493333260218301
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,float16,0,0.14855999747912088
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,0.1358506679534912
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,float16,0,0.1341600020726522
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.12179733316103618
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,1,128,1,float16,float16,0,0.1335093379020691
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,1,128,1,float16,fp8,0,0.12148267030715942
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,float16,0,0.13180800278981528
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.1216373344262441
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,2,1,128,1,float16,float16,0,0.45281068483988446
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,float16,0,0.2550719976425171
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,2,1,128,1,float16,fp8,0,0.3895893494288127
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,0.24447466929753622
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,1,128,1,float16,fp8,0,0.2632586757342021
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,float16,0,0.2810773253440857
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,0.22580265998840332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,float16,0,0.15878933668136597
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,float16,0,0.17657599846522012
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,0.1458560029665629
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,0.13612266381581625
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,1,128,1,float16,float16,0,0.1718826691309611
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,1,128,1,float16,fp8,0,0.15793599685033163
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,float16,0,0.16378666957219443
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,0.15729066729545593
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,0.10314666231473286
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,1,128,1,float16,fp8,0,0.10242133339246114
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,float16,0,0.1139359970887502
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,0.09516800443331401
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,float16,0,0.14586666226387024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,float16,0,0.10941867033640544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,1,128,1,float16,fp8,0,0.09698667128880818
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,float16,0,0.10966400305430095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.09883200128873189
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,2,1,128,1,float16,float16,0,0.29893332719802856
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,2,1,128,1,float16,fp8,0,0.25785599152247113
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,float16,0,0.1762239933013916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,0.1558026671409607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,1,128,1,float16,float16,0,0.17485332489013672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,1,128,1,float16,fp8,0,0.1527839998404185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,1,128,1,float16,float16,0,0.26440000534057617
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,float16,0,0.1585813363393148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,0.15583466490109762
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,float16,0,0.11414399743080139
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,1,128,1,float16,float16,0,0.11659733454386394
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,1,128,1,float16,fp8,0,0.10143466790517171
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,float16,0,0.11681600411732991
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,0.10282666484514873
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,float16,0,0.08687999844551086
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,float16,0,0.11296000083287557
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.0768746683994929
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,1,128,1,float16,float16,0,0.08708799878756206
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,1,128,1,float16,float16,0,0.10937066872914632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,1,128,1,float16,fp8,0,0.07844266792138417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,float16,0,0.08514666557312012
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.07836266855398814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,float16,0,0.08349333206812541
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.07787199815114339
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,1,128,1,float16,float16,0,0.08258133133252461
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,1,128,1,float16,fp8,0,0.0764160007238388
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,float16,0,0.08311999837557475
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.07680533329645793
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,1,128,1,float16,float16,0,0.10958932836850484
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,float16,0,0.14135467012723288
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,0.13683199882507324
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,2,1,128,1,float16,float16,0,0.24751466512680054
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,2,1,128,1,float16,fp8,0,0.2350239952405294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,1,128,1,float16,float16,0,0.1546346644560496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,1,128,1,float16,fp8,0,0.1317813297112783
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,float16,0,0.15205867091814676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.08930666248003642
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,1,128,1,float16,float16,0,0.09718400239944458
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,float16,0,0.10085866848627727
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,float16,0,0.09493333101272583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.08683733145395915
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.05605333546797434
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,1,128,1,float16,float16,0,0.062037333846092224
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,1,128,1,float16,fp8,0,0.22642666101455688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,1,128,1,float16,fp8,0,0.05338133374849955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,float16,0,0.06376533210277557
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.05602133274078369
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,0.09494933485984802
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,float16,0,0.056202664971351624
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.0537120004494985
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,1,128,1,float16,float16,0,0.060047999024391174
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,1,128,1,float16,fp8,0,0.05159999926884969
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,float16,0,0.05585599939028422
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.0536106675863266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,1,128,1,float16,float16,0,0.059861332178115845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,1,128,1,float16,fp8,0,0.05183466772238413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,float16,0,0.05992533266544342
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.05199466645717621
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,2,1,128,1,float16,float16,0,0.16420267025629678
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,2,1,128,1,float16,fp8,0,0.1437279979387919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.09391466776529948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,float16,0,0.0963253378868103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,0.08912000060081482
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,1,128,1,float16,float16,0,0.10073066751162212
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,1,128,1,float16,fp8,0,0.08343467116355896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,float16,0,0.09762133161226909
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,1,128,1,float16,fp8,0,0.08945066730181377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,0.08948799967765808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,0.1392586628595988
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,float16,0,0.06699199974536896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.06010133524735769
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,float16,0,0.06250133117039998
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,1,128,1,float16,fp8,0,0.056090667843818665
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,float16,0,0.06621333460013072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.06018666426340739
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,float16,0,0.04826666911443075
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.0415786678592364
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,1,128,1,float16,float16,0,0.04827733337879181
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,1,128,1,float16,fp8,0,0.04510400195916494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,float16,0,0.048207998275756836
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,float16,0,0.045824001232783
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.043791999419530235
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,1,128,1,float16,float16,0,0.04781866570313772
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,1,128,1,float16,fp8,0,0.04370133578777313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,float16,0,0.04781333108743032
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.04347200194994608
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,float16,0,0.04717866579691569
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,float16,0,0.05796800057093302
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.043552001317342125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,1,128,1,float16,float16,0,0.04779199759165446
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,1,128,1,float16,fp8,0,0.043477331598599754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,float16,0,0.04582933088143667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.04376000165939331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,float16,0,0.09190932909647624
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,1,128,1,float16,float16,0,0.09140800436337788
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,0.08117333551247914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,2,1,128,1,float16,float16,0,0.1572106679280599
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,0.08102400104204814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,float16,0,0.0942186713218689
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,1,128,1,float16,fp8,0,0.08246933420499165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,float16,0,0.0580213318268458
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.05292266607284546
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,1,128,1,float16,float16,0,0.06010666489601135
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,1,128,1,float16,fp8,0,0.05236266553401947
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,float16,0,0.0580213318268458
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,1,128,1,float16,float16,0,0.06724800169467926
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.05187733471393585
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,float16,0,0.03758399933576584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.03349333256483078
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,1,128,1,float16,float16,0,0.037776000797748566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,1,128,1,float16,fp8,0,0.03350399931271871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.03364799916744232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,float16,0,0.03562133262554804
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.033285332222779594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,1,128,1,float16,float16,0,0.03544000039498011
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.04178133110205332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,1,128,1,float16,fp8,0,0.03350933392842611
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,float16,0,0.03489600121974945
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.03328000009059906
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,float16,0,0.03472533325354258
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.03129599988460541
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,1,128,1,float16,float16,0,0.03341866781314214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,1,128,1,float16,fp8,0,0.032229334115982056
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,float16,0,0.0352960005402565
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.031370667119820915
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,float16,0,0.03283733377854029
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.031504000226656594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,1,128,1,float16,float16,0,0.033258666594823204
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.05167999863624573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,1,128,1,float16,fp8,0,0.03129599988460541
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,float16,0,0.033173332611719765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.03141866624355316
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,1,128,1,float16,float16,0,0.0621973325808843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,1,128,1,float16,fp8,0,0.05202666421731313
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.055776000022888184
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,float16,0,0.058703998724619545
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,2,1,128,1,float16,float16,0,0.10309333602587382
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,2,1,128,1,float16,fp8,0,0.09322133660316467
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,float16,0,0.06012799839178721
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,2,1,128,1,float16,fp8,0,0.13006933530171713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,float16,0,0.04190933207670847
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.03534399966398875
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,1,128,1,float16,float16,0,0.04168533285458883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,1,128,1,float16,fp8,0,0.03554133325815201
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,float16,0,0.041759997606277466
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,float16,0,0.02985599885384242
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.025477332373460133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,1,128,1,float16,float16,0,0.030421334008375805
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.03526933242877325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,float16,0,0.029909332593282063
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.02740799884001414
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.02741866558790207
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,float16,0,0.027056001126766205
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,1,128,1,float16,fp8,0,0.026954665780067444
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,1,128,1,float16,float16,0,0.026928000152111053
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,float16,0,0.028789333999156952
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.027317332724730175
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,float16,0,0.027061333258946735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.027258666853109997
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,1,128,1,float16,float16,0,0.027024000883102417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,1,128,1,float16,fp8,0,0.02701333413521449
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,float16,0,0.02900800108909607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.02493866781393687
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,float16,0,0.027136000494162243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,1,128,1,float16,float16,0,0.028368001182874043
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.025045332809289295
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,1,128,1,float16,fp8,0,0.02499199906984965
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,float16,0,0.02716800073782603
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.027285332481066387
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,float16,0,0.06035199761390686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.05385066568851471
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,2,1,128,1,float16,fp8,0,0.08684800068537395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,2,1,128,1,float16,float16,0,0.10307199756304423
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,1,128,1,float16,float16,0,0.060415998101234436
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,1,128,1,float16,fp8,0,0.053898667295773826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,float16,0,0.061039999127388
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,float16,0,0.03823466598987579
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.051914667089780174
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.03452266752719879
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,1,128,1,float16,fp8,0,0.03486400097608566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.03449599941571554
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,1,128,1,float16,float16,0,0.0373279998699824
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,float16,0,0.035877334574858345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,1,128,1,float16,fp8,0,0.02740799884001414
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,float16,0,0.02332266668478648
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,1,128,1,float16,float16,0,0.023376000424226124
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.022170667846997578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,1,128,1,float16,fp8,0,0.021221332252025604
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.021205333371957142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,float16,0,0.02312533309062322
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.02102400114138921
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,1,128,1,float16,fp8,0,0.021194666624069214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.02141333371400833
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,float16,0,0.02306666721900304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,float16,0,0.03766400118668874
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,float16,0,0.021290667355060577
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.020154666155576706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,1,128,1,float16,float16,0,0.021162666380405426
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,1,128,1,float16,fp8,0,0.02120000123977661
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.02086399992307027
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,float16,0,0.021210665504137676
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,float16,0,0.02290133386850357
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.021125334004561108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,1,128,1,float16,float16,0,0.02102400114138921
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,1,128,1,float16,fp8,0,0.020874666670958202
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,float16,0,0.021290667355060577
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.021269333859284718
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,float16,0,0.02120000123977661
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.01934933289885521
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,1,128,1,float16,float16,0,0.021312000850836437
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,1,128,1,float16,fp8,0,0.019199999670187633
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,float16,0,0.021530665457248688
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.021168000996112823
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,float16,0,0.043893332282702126
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.038975998759269714
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.054117331902186074
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,2,1,128,1,float16,float16,0,0.07517333328723907
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,2,1,128,1,float16,fp8,0,0.06635199983914693
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,1,128,1,float16,fp8,0,0.037871999045213066
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,float16,0,0.04372799893220266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.039359999199708305
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,float16,0,0.027477333943049114
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.025381334125995636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,1,128,1,float16,float16,0,0.02735466758410136
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,float16,0,0.022789334257443745
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,1,128,1,float16,float16,0,0.023354666928450268
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.024661332368850708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,float16,0,0.01704000060757001
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,float16,0,0.028175999720891316
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.015013333410024643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,1,128,1,float16,float16,0,0.016805333395799
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,1,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.01684800038735072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,1,128,1,float16,float16,0,0.01509333277742068
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.015135999768972397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,float16,0,0.016869333883126576
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,1,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,float16,0,0.016842667013406754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,float16,0,0.01504533365368843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.015135999768972397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,1,128,1,float16,float16,0,0.015013333410024643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,1,128,1,float16,fp8,0,0.014848000059525171
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,float16,0,0.015135999768972397
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.014917333920796713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,float16,0,0.015370666980743408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.014762666076421738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,1,128,1,float16,float16,0,0.014959999670584997
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,1,128,1,float16,fp8,0,0.014725333700577417
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.014805333067973455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.014698666830857595
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,1,128,1,float16,float16,0,0.014981333166360855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,1,128,1,float16,fp8,0,0.014730667074521383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,float16,0,0.01522133375207583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.014767999450365702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,float16,0,0.015130666395028433
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.014906667172908783
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,1,128,1,float16,float16,0,0.015168000012636185
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,1,128,1,float16,fp8,0,0.014752000570297241
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.01470400020480156
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,1,128,1,float16,fp8,0,0.02526933451493581
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,2,1,128,1,float16,float16,0,0.036271999279658
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,float16,0,0.017077332983414333
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.01492799942692121
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,float16,0,0.02091199904680252
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,float16,0,0.015754666179418564
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,2,1,128,1,float16,fp8,0,0.03161066770553589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,float16,0,0.015002666662136713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,1,128,1,float16,float16,0,0.021301334102948506
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,1,128,1,float16,float16,0,0.04390933116277059
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,1,128,1,float16,fp8,0,0.019066666563351948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.019248000035683315
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,float16,0,0.021146667500336964
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,float16,0,0.014789332946141561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,1,128,1,float16,float16,0,0.014762666076421738
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.013199999928474426
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,float16,0,0.015103999525308609
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,1,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,float16,0,0.012997332960367203
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,float16,0,0.012981332838535309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,1,128,1,float16,float16,0,0.013072000195582708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,1,128,1,float16,fp8,0,0.012714666624863943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,float16,0,0.013088000317414602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.012698666503032049
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,float16,0,0.013007999708255133
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,1,128,1,float16,float16,0,0.012821332861979803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,1,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,float16,0,0.013034666577974955
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,float16,0,0.012949333836634954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.012698666503032049
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.01267733300725619
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,1,128,1,float16,float16,0,0.012639999389648438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,1,128,1,float16,fp8,0,0.012725333372751871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,float16,0,0.013077333569526672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,float16,0,0.012746666868527731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.012800000607967377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,1,128,1,float16,float16,0,0.012789333860079447
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,1,128,1,float16,fp8,0,0.012005332857370377
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,float16,0,0.012928000340859095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,float16,0,0.012709333250919977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.01267733300725619
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,1,128,1,float16,fp8,0,0.011674666156371435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,float16,0,0.012837332983811697
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,float16,0,0.012954667210578918
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,1,128,1,float16,float16,0,0.012698666503032049
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,1,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,float16,0,0.012944000462690989
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.018858666221300762
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,float16,0,0.015008000036080679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,2,1,128,1,float16,float16,0,0.023455999791622162
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,2,1,128,1,float16,fp8,0,0.022175999979178112
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,1,128,1,float16,float16,0,0.015050667027632395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,1,128,1,float16,fp8,0,0.014698666830857595
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,float16,0,0.014933332800865173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.01488000030318896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.010741333166758219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,1,128,1,float16,float16,0,0.010879999647537867
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,1,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,float16,0,0.010602666685978571
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,float16,0,0.011141333729028702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.010847999403874079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,1,128,1,float16,float16,0,0.010586666564146677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,1,128,1,float16,fp8,0,0.010453333457310995
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.010911999891201654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,float16,0,0.010965333630641302
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,float16,0,0.010640000303586325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,1,128,1,float16,float16,0,0.01098666712641716
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,1,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,float16,0,0.010549332946538925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,1,128,1,float16,float16,0,0.012768000364303589
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,1,128,1,float16,float16,0,0.010672000547250112
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.012645332763592402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,1,128,1,float16,fp8,0,0.009114666531483332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,float16,0,0.010751999914646149
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,float16,0,0.009568000212311745
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,1,128,1,float16,float16,0,0.00973866693675518
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,1,128,1,float16,fp8,0,0.00915733352303505
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,float16,0,0.010608000059922537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,float16,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,1,128,1,float16,float16,0,0.009018666421373686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,1,128,1,float16,fp8,0,0.009269333134094873
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,float16,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,float16,0,0.009056000038981438
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.00916800027092298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,1,128,1,float16,float16,0,0.009029333169261614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,1,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,float16,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.009173333023985228
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,float16,0,0.012981332838535309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,float16,0,0.010853332777818045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,2,1,128,1,float16,float16,0,0.020831999679406483
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,2,1,128,1,float16,fp8,0,0.019237333287795384
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,1,128,1,float16,float16,0,0.014576000471909841
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,1,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,float16,0,0.014853333433469137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,float16,0,0.011018666128317514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,1,128,1,float16,float16,0,0.0107893335322539
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,float16,0,0.010853332777818045
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,1,128,1,float16,fp8,0,0.010288000106811523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,float16,0,0.010586666564146677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,float16,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.010645333677530289
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,1,128,1,float16,float16,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,1,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,float16,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,1,128,1,float16,float16,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,1,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,float16,0,0.009136000027259191
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,float16,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.015077333897352219
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,1,128,1,float16,float16,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,1,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,float16,0,0.00922133338948091
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,float16,0,0.009072000160813332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.008656000097592672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,1,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,1,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,float16,0,0.00914666677514712
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,float16,0,0.008714666590094566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,1,128,1,float16,float16,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,1,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,float16,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,float16,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,1,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,1,128,1,float16,fp8,0,0.008645333349704742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,float16,0,0.008746666833758354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.012831999609867731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,float16,0,0.013167999684810638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,2,1,128,1,float16,fp8,0,0.019066666563351948
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,1,128,1,float16,float16,0,0.013061333447694778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,1,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,float16,0,0.013088000317414602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,float16,0,0.010949333508809408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.012666666259368261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,1,128,1,float16,float16,0,0.010922666639089584
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,1,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.008586666857202848
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,float16,0,0.010687999427318573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,float16,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.009136000027259191
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,1,128,1,float16,float16,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.008821333448092142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,1,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,float16,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,float16,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,1,128,1,float16,float16,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,1,128,1,float16,fp8,0,0.008858666444818178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,float16,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,1,128,1,float16,float16,0,0.008736000085870424
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,1,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,float16,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,float16,0,0.008629333227872849
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.008842666943868002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,1,128,1,float16,float16,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,1,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,float16,0,0.008816000074148178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,float16,0,0.008682666967312494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.008538666491707167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,1,128,1,float16,float16,0,0.009077333534757296
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,2,1,128,1,float16,float16,0,0.020047999918460846
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,1,128,1,float16,fp8,0,0.008687999720374743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,float16,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,float16,0,0.00879466657837232
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,1,128,1,float16,float16,0,0.00860799973209699
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,1,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,float16,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.008799999952316284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,float16,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,2,1,128,1,float16,float16,0,0.018954666952292126
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,2,1,128,1,float16,fp8,0,0.017840000490347546
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,float16,0,0.013066666821638743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,1,128,1,float16,float16,0,0.012858666479587555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,1,128,1,float16,fp8,0,0.012789333860079447
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.008853333070874214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,float16,0,0.010698666175206503
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,float16,0,0.012896000097195307
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.009216000015536943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,1,128,1,float16,float16,0,0.01090666651725769
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,1,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,float16,0,0.010725333044926325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.008832000195980072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,float16,0,0.009061333412925402
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.008602666358153025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,1,128,1,float16,float16,0,0.008842666943868002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,1,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,float16,0,0.008746666833758354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,float16,0,0.008703999842206636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,1,128,1,float16,float16,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,1,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,float16,0,0.008725333337982496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,float16,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.008602666358153025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,1,128,1,float16,float16,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,1,128,1,float16,fp8,0,0.00860799973209699
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,float16,0,0.00874133345981439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,float16,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,1,128,1,float16,float16,0,0.008816000074148178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,1,128,1,float16,fp8,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,float16,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,float16,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.008650666723648706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,1,128,1,float16,float16,0,0.008656000097592672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,float16,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,float16,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,1,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,float16,0,0.008709333216150602
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.008661333471536636
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,float16,0,0.008842666943868002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,0,0.009008000294367472
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,0,0.008576000109314919
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,0,0.00867733359336853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,0,0.006949333474040031
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,0,0.008303999900817871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.006911999856432279
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,0,0.006810666372378667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,0,0.00721066693464915
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.0069333333522081375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,0,0.007045333584149678
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,0,0.006805333619316419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,0,0.006853333363930385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,0,0.006762666627764702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.006757333253820737
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.006853333363930385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,0,0.006954666847983996
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,0,0.006842666616042455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,0,0.006858666737874349
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.006773333375652631
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,0,0.0069333333522081375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,1,128,1,float16,fp8,0,0.008602666358153025
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.0068800002336502075
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,0,0.0068800002336502075
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,0,0.0069866664707660675
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,1,128,1,float16,float16,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.006954666847983996
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,0,0.006805333619316419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,0,0.006730666384100914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,0,0.00706666645904382
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,0,0.00684799998998642
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,0,0.006741333131988843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.0069333333522081375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,0,0.006842666616042455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.006831999868154526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,0,0.006842666616042455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.006890666360656421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,0,0.006842666616042455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,0,0.006613333399097125
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.006853333363930385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,float16,0,0.32149867216746014
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,0,0.0069386667261521024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,0.28914133707682294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,float16,0,0.3236959973971049
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.0069386667261521024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,0.288917342821757
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,float16,0,0.21061867475509644
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,0.18735466400782266
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,float16,0,0.19787200291951498
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,0.18948266903559366
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,float16,0,0.1946880022684733
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.1874613364537557
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,float16,0,0.2060426672299703
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.17848533391952515
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,float16,0,0.20517333348592123
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,0,0.006725333631038666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,0.1842026710510254
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,float16,0,0.2042400042215983
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.0069866664707660675
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,0.1771626671155294
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,float16,0,0.16028799613316855
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,0.14452800154685974
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,float16,0,0.15921599666277567
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,0.1361120045185089
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,0,0.006714666883150737
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.13594667116800943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,float16,0,0.1565013329188029
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,float16,0,0.15060266852378845
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,0.12772267063458762
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,float16,0,0.13893866539001465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,0.12775466839472452
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,float16,0,0.1339040001233419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.12238400181134541
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.11939733227094014
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,float16,0,0.1236853301525116
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.11538133025169373
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,float16,0,0.13194666306177774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.11507733662923177
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,float16,0,0.1718453367551168
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,float16,0,0.16145066420237222
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,0.158160001039505
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,float16,0,0.11293333768844604
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,0.1013706624507904
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,float16,0,0.10481599966684978
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,0.09573866923650105
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,float16,0,0.10964799920717876
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.0941546658674876
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,float16,0,0.10127466917037964
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.09373866518338521
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,float16,0,0.10872000455856323
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.09286933143933614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.09289066990216573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,float16,0,0.10692800084749858
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,0.10103467106819153
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,float16,0,0.11589866876602173
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,0.10187733173370361
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,float16,0,0.08052266637484233
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.07885333398977916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,float16,0,0.08054933448632558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.0726506660381953
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,float16,0,0.07829333345095317
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.07743466893831889
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,float16,0,0.07851199805736542
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.07210666437943776
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,float16,0,0.07827199995517731
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.07640533149242401
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,float16,0,0.07829333345095317
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.07239999870459239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,float16,0,0.09963732957839966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,float16,0,0.15624533096949259
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.08916266759236653
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.14357866843541464
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.08669867118199666
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,float16,0,0.06292266647020976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.058101331194241844
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,float16,0,0.06326933205127716
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,float16,0,0.12488533059755962
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.057962665955225624
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,float16,0,0.06012799839178721
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.055685331424077354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,float16,0,0.06159999966621399
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.055813332398732506
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.053946668903032936
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,0.14593600233395895
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.05479466418425242
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,float16,0,0.05602133274078369
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.05197333296140035
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,float16,0,0.05806399881839752
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.053930665055910744
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,float16,0,0.06696000198523204
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.05600533386071523
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,float16,0,0.06225599845250448
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.05993066728115082
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,float16,0,0.10103467106819153
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,float16,0,0.049914668003718056
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.045040001471837364
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,float16,0,0.04783466458320618
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.04543466866016388
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,float16,0,0.043968002001444496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.04354666670163473
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,float16,0,0.04785066843032837
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.0433599998553594
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,float16,0,0.047637333472569786
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.04366933306058248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,float16,0,0.04778666794300079
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.04340266684691111
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,float16,0,0.04582933088143667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.04160533348719279
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,float16,0,0.0476693312327067
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.043509334325790405
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,float16,0,0.0595413347085317
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.0518506666024526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.051818668842315674
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,float16,0,0.037690666814645134
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.03362133353948593
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,float16,0,0.037434667348861694
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.03356799980004629
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,float16,0,0.09957333405812581
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,float16,0,0.03563733398914337
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.032245332996050514
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,float16,0,0.035589332381884255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,float16,0,0.03482666611671448
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.031343999008337654
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.03324799984693527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,float16,0,0.03533333291610082
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.031632001201311745
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,float16,0,0.0334346666932106
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.029802667597929638
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,float16,0,0.059445331494013466
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.029968000948429108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.02940266579389572
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,float16,0,0.05996266504128774
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.03044266750415166
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,float16,0,0.04171200096607208
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,float16,0,0.041738669077555336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.03737599899371465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,float16,0,0.029658667743206024
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.027327999472618103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,float16,0,0.029333333174387615
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.02738133321205775
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,float16,0,0.029258665939172108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.027034667630990345
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,float16,0,0.027647999425729115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.027232001225153606
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,float16,0,0.028143999477227528
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.02699733277161916
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,float16,0,0.05909866591294607
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,float16,0,0.027263998985290527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.026975999275843304
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.02731200059254964
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,float16,0,0.0288426677385966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,float16,0,0.027119999130566914
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.027232001225153606
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,float16,0,0.027386667827765148
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.025008000433444977
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,float16,0,0.02733866622050603
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.027002667387326557
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,float16,0,0.033914667864640556
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,float16,0,0.037658666570981346
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.032831999162832894
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,float16,0,0.037471999724706016
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.03335466732581457
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,float16,0,0.025226667523384094
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,float16,0,0.03388266762097677
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.022309333086013794
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.02309333284695943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.03748800108830134
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.021013334393501282
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,float16,0,0.022255999346574146
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,float16,0,0.02125866711139679
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.020954666038354237
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,float16,0,0.021136000752449036
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,float16,0,0.02233600119749705
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.021194666624069214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.019578666736682255
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,float16,0,0.02141333371400833
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.020949333906173706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,float16,0,0.02130666623512904
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.019258666783571243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,float16,0,0.021125334004561108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.020938667158285778
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.019258666783571243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,float16,0,0.02146133283774058
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.02090666691462199
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,float16,0,0.02770666778087616
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.025253333151340485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,float16,0,0.02752533306678136
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,float16,0,0.017477333545684814
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.016778666526079178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.025301332275072735
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,float16,0,0.017349333812793095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,float16,0,0.015050667027632395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.016832000265518825
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.014975999792416891
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,float16,0,0.03350399931271871
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.014815999815861383
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,float16,0,0.015317333241303762
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,float16,0,0.015306666493415833
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,float16,0,0.02333866556485494
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,float16,0,0.015013333410024643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,float16,0,0.014970666418472925
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.019215999792019527
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,float16,0,0.02143466720978419
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.015002666662136713
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.014805333067973455
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,float16,0,0.01532799998919169
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,float16,0,0.021002667645613354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,float16,0,0.015109332899252573
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,float16,0,0.015141333142916361
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.014837333311637243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,float16,0,0.014943999548753103
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.015178666760524115
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,float16,0,0.01504533365368843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.014789332946141561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,float16,0,0.015029333531856537
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,float16,0,0.012928000340859095
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,float16,0,0.012944000462690989
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.01267733300725619
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,float16,0,0.012917333592971167
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,float16,0,0.01312000056107839
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.012629333883523941
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,float16,0,0.01522133375207583
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,float16,0,0.012879999975363413
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,float16,0,0.012773333738247553
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,float16,0,0.012858666479587555
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.014938666174809137
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,float16,0,0.013130666067202887
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.012650666137536367
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,float16,0,0.01301866645614306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,float16,0,0.012655999511480331
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,float16,0,0.01504533365368843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,float16,0,0.012805332740147909
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.01166933278242747
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,float16,0,0.012874666601419449
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.012671999633312225
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,float16,0,0.012698666503032049
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,float16,0,0.010981333752473196
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.010863999525705973
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,float16,0,0.01091733326514562
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.010602666685978571
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,float16,0,0.010949333508809408
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,float16,0,0.010591999938090643
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.009370666618148485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,float16,0,0.010618666807810465
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,float16,0,0.012890666723251343
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,float16,0,0.01099733387430509
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,float16,0,0.009914666414260864
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.009088000282645226
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,float16,0,0.010485333700974783
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.009072000160813332
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,float16,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,float16,0,0.00874133345981439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,float16,0,0.0129120002190272
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,float16,0,0.0102186668664217
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,float16,0,0.008933333059151968
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,float16,0,0.009370666618148485
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.00916800027092298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,float16,0,0.009984000275532404
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,float16,0,0.008767999708652496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,float16,0,0.009850666547815004
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.01267733300725619
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,float16,0,0.010640000303586325
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.009957333405812582
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,float16,0,0.010960000256697336
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,float16,0,0.010661333799362183
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,float16,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,float16,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.008853333070874214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,float16,0,0.008672000219424566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.009082666908701261
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.008810666700204214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,float16,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,float16,0,0.008976000050703684
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,float16,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,float16,0,0.009109333157539368
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,float16,0,0.009178666397929192
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,float16,0,0.00897066667675972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,float16,0,0.00895999992887179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,float16,0,0.010928000013033548
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,float16,0,0.01089599976936976
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.009706666693091393
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,float16,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,float16,0,0.009119999905427298
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,float16,0,0.008698666468262672
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.008767999708652496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,float16,0,0.008874666566650072
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.008634666601816813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.008821333448092142
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,float16,0,0.008826666822036108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,float16,0,0.009125333279371262
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.008725333337982496
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,float16,0,0.008693333094318708
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,float16,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.008714666590094566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,float16,0,0.008687999720374743
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,float16,0,0.008863999818762144
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.008634666601816813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,float16,0,0.008885333314538002
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,float16,0,0.008634666601816813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,float16,0,0.010565333068370819
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.008810666700204214
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,float16,0,0.010629333555698395
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,float16,0,0.00903466654320558
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,float16,0,0.008789333204428354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,float16,0,0.008912000184257826
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,float16,0,0.00892800030608972
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,float16,0,0.009152000149091085
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,float16,0,0.00877333308259646
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,float16,0,0.008757333581646284
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.009002666920423508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,float16,0,0.008634666601816813
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.00871999996403853
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,float16,0,0.008890666688481966
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.008650666723648706
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,float16,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.008805333326260248
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,float16,0,0.009045333291093508
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,float16,0,0.008752000207702318
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.008496000121037165
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,float16,0,0.00874133345981439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.00874133345981439
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,0,0.008645333349704742
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.0069333333522081375
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,0,0.008522666369875273
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,0,0.007082666580875714
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.006741333131988843
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.006858666737874349
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,0,0.006751999879876773
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.006826666494210561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.006746666505932808
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,0,0.006720000257094701
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,0,0.0069866664707660675
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.007125333572427432
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,float16,0,0.009216000015536943
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,0,0.00706666645904382
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,float16,0,0.008592000231146812
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,0,0.006874666859706243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.007114666824539502
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.006768000001708667
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,0,0.006762666627764702
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,0,0.006853333363930385
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.00690133310854435
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,0,0.006917333230376244
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.006826666494210561
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,float16,0,0.008672000219424566
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.006874666859706243
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,0,0.007045333584149678
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.006911999856432279
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,0,0.00696000022192796
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.006815999746322632
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,0,0.006890666360656421
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.006821333120266597
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,0,0.006773333375652631
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.00666133314371109
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.006831999868154526
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.008746666833758354
VLLM,0.19.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,0,0.006895999734600385
