framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,1,128,1,float16,fp8,0,22.577232360839844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,1,128,1,float16,float16,0,31.045193990071613
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,64,128,1,float16,float16,0,17.267386118570965
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,64,128,1,float16,fp8,0,13.098346710205078
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,1,128,1,float16,fp8,0,11.302213033040365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,1,128,1,float16,float16,0,15.647023518880209
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,2,128,1,float16,float16,0,16.02387237548828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,2,128,1,float16,fp8,0,11.311232248942057
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,4,128,1,float16,fp8,0,11.767147064208984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,4,128,1,float16,float16,0,16.34332275390625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,8,128,1,float16,float16,0,16.30029805501302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,8,128,1,float16,fp8,0,11.81537119547526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,64,128,1,float16,float16,0,8.159168243408203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,64,128,1,float16,fp8,0,6.497477213541667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,1,128,1,float16,float16,0,7.3805281321207685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,1,128,1,float16,fp8,0,5.457888285319011
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,2,128,1,float16,float16,0,7.471792221069336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,2,128,1,float16,fp8,0,5.575152079264323
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,2,128,1,float16,float16,0,30.522672017415363
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,4,128,1,float16,float16,0,7.739957173665364
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,8,128,1,float16,float16,0,7.746218363444011
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,8,128,1,float16,fp8,0,5.873104095458984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,4,128,1,float16,fp8,0,5.762879689534505
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,1,128,1,float16,float16,0,3.6010878880818686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,64,128,1,float16,fp8,0,3.3306665420532227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,64,128,1,float16,float16,0,3.848533312479655
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,1,128,1,float16,fp8,0,3.142746607462565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,2,128,1,float16,float16,0,3.516240119934082
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,2,128,1,float16,fp8,0,2.929504076639811
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,4,128,1,float16,float16,0,3.756490707397461
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,4,128,1,float16,fp8,0,3.2081174850463867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,8,128,1,float16,float16,0,3.6513598759969077
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,8,128,1,float16,fp8,0,3.1836318969726562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,4,128,1,float16,fp8,0,23.103253682454426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,1,128,1,float16,fp8,0,12.808639526367188
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,1,128,1,float16,float16,0,17.75060272216797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,4,128,1,float16,float16,0,31.37518310546875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,2,128,1,float16,float16,0,18.11282730102539
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,8,128,1,float16,fp8,0,22.911376953125
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,2,128,1,float16,fp8,0,13.352896372477213
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,4,128,1,float16,fp8,0,13.59121068318685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,4,128,1,float16,float16,0,18.258565266927082
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,8,128,1,float16,fp8,0,13.85870361328125
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,64,128,1,float16,fp8,0,7.798709233601888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,64,128,1,float16,float16,0,9.782682418823242
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,8,128,1,float16,float16,0,18.396277109781902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,1,128,1,float16,float16,0,8.860341389973959
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,1,128,1,float16,fp8,0,6.420341491699219
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,2,128,1,float16,fp8,0,6.345040003458659
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,2,128,1,float16,float16,0,8.815141042073568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,2,128,1,float16,fp8,0,22.84314219156901
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,4,128,1,float16,fp8,0,6.684517542521159
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,4,128,1,float16,float16,0,8.518074671427408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,8,128,1,float16,fp8,0,6.654426574707031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,1,128,1,float16,fp8,0,3.1924425760904946
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,8,128,1,float16,float16,0,9.18669319152832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,1,128,1,float16,float16,0,4.003871917724609
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,64,128,1,float16,float16,0,4.630394617716472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,2,128,1,float16,float16,0,4.001119931538899
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,64,128,1,float16,fp8,0,4.0457814534505205
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,8,128,1,float16,float16,0,31.27759043375651
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,2,128,1,float16,fp8,0,3.229946772257487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,4,128,1,float16,float16,0,4.283493359883626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,4,128,1,float16,fp8,0,3.779754638671875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,1,128,1,float16,fp8,0,1.6382026672363281
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,8,128,1,float16,float16,0,4.400586764017741
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,64,128,1,float16,float16,0,2.3068854014078775
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,64,128,1,float16,fp8,0,2.030080000559489
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,4,128,1,float16,float16,0,2.081434726715088
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,4,128,1,float16,fp8,0,2.285663922627767
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,8,128,1,float16,fp8,0,1.721402645111084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,8,128,1,float16,fp8,0,3.436527887980143
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,2,128,1,float16,fp8,0,1.6854400634765625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,2,128,1,float16,float16,0,2.04967466990153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,8,128,1,float16,float16,0,2.099653402964274
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,1,128,1,float16,fp8,0,9.254074732462565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,1,128,1,float16,float16,0,12.588794708251953
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,1,128,1,float16,float16,0,2.3911840120951333
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,2,128,1,float16,float16,0,12.690869649251303
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,2,128,1,float16,fp8,0,9.124469121297201
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,4,128,1,float16,fp8,0,9.342720031738281
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,1,128,1,float16,float16,0,6.165066401163737
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,1,128,1,float16,fp8,0,4.505205472310384
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,4,128,1,float16,float16,0,12.991920471191406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,64,128,1,float16,fp8,0,5.720122655232747
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,8,128,1,float16,float16,0,12.720565795898438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,2,128,1,float16,float16,0,5.967967987060547
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,2,128,1,float16,fp8,0,4.756613413492839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,4,128,1,float16,fp8,0,4.8351999918619795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,4,128,1,float16,float16,0,6.117408116658528
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,8,128,1,float16,fp8,0,4.867978731791179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,64,128,1,float16,float16,0,6.832549413045247
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,1,128,1,float16,float16,0,2.809685389200846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,64,128,1,float16,float16,0,3.2811571756998696
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,2,128,1,float16,float16,0,2.823967933654785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,2,128,1,float16,fp8,0,2.2882560094197593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,4,128,1,float16,float16,0,2.9276533126831055
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,8,128,1,float16,fp8,0,9.476405461629232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,8,128,1,float16,fp8,0,2.382981300354004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,8,128,1,float16,float16,0,2.9084800084431968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,1,128,1,float16,fp8,0,2.501039981842041
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,1,128,1,float16,float16,0,1.4283893903096516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,64,128,1,float16,fp8,0,2.914250691731771
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,64,128,1,float16,float16,0,1.6730186144510906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,1,128,1,float16,fp8,0,1.1717173258463542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,2,128,1,float16,float16,0,1.443376064300537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,2,128,1,float16,fp8,0,1.1871306896209717
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,4,128,1,float16,fp8,0,2.4836907386779785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,4,128,1,float16,fp8,0,1.239349365234375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,8,128,1,float16,float16,0,1.4864746729532878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,8,128,1,float16,fp8,0,1.2504853407541912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,64,128,1,float16,fp8,0,1.4986400604248047
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,8,128,1,float16,float16,0,6.067424138387044
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,1,128,1,float16,fp8,0,11.895008087158203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,2,128,1,float16,fp8,0,12.2598876953125
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,4,128,1,float16,float16,0,1.4919999440511067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,1,128,1,float16,float16,0,16.29208501180013
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,2,128,1,float16,float16,0,16.287562052408855
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,4,128,1,float16,fp8,0,12.361434936523438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,4,128,1,float16,float16,0,16.976484934488933
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,1,128,1,float16,fp8,0,5.911109288533528
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,1,128,1,float16,float16,0,7.775311787923177
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,64,128,1,float16,fp8,0,7.8226668039957685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,8,128,1,float16,fp8,0,12.700677235921225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,2,128,1,float16,float16,0,8.048826853434244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,2,128,1,float16,fp8,0,6.092421213785808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,8,128,1,float16,float16,0,17.08771260579427
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,4,128,1,float16,float16,0,8.514453252156576
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,8,128,1,float16,fp8,0,6.319248199462891
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,64,128,1,float16,float16,0,9.362149556477865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,64,128,1,float16,float16,0,4.412869453430176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,1,128,1,float16,float16,0,3.68829345703125
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,1,128,1,float16,fp8,0,3.1507787704467773
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,2,128,1,float16,float16,0,3.73362668355306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,4,128,1,float16,fp8,0,6.233797073364258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,2,128,1,float16,fp8,0,2.971738815307617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,4,128,1,float16,float16,0,3.8135627110799155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,8,128,1,float16,fp8,0,3.374858538309733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,1,128,1,float16,float16,0,1.8473013242085774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,64,128,1,float16,float16,0,2.218133290608724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,1,128,1,float16,fp8,0,1.510240077972412
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,64,128,1,float16,fp8,0,2.0205012957255044
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,2,128,1,float16,float16,0,1.866341272989909
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,2,128,1,float16,fp8,0,1.538416067759196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,4,128,1,float16,fp8,0,3.230997403462728
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,8,128,1,float16,float16,0,7.835018793741862
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,8,128,1,float16,float16,0,1.9292532602945964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,64,128,1,float16,fp8,0,3.9676640828450522
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,8,128,1,float16,fp8,0,1.6049866676330566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,1,128,1,float16,float16,0,0.9510719776153564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,1,128,1,float16,fp8,0,0.7869066397349039
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,64,128,1,float16,float16,0,1.1532800197601318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,4,128,1,float16,fp8,0,0.839792013168335
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,8,128,1,float16,float16,0,3.80184014638265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,8,128,1,float16,float16,0,1.0052639643351238
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,2,128,1,float16,fp8,0,0.795525312423706
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,4,128,1,float16,float16,0,1.0018880367279053
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,4,128,1,float16,fp8,0,1.6264692942301433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,8,128,1,float16,fp8,0,0.8520053227742513
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,1,128,1,float16,float16,0,9.301082611083984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,64,128,1,float16,fp8,0,1.0441760222117107
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,4,128,1,float16,float16,0,1.9727946917215984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,2,128,1,float16,fp8,0,7.019850413004558
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,2,128,1,float16,float16,0,9.426746368408203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,4,128,1,float16,float16,0,9.834789276123047
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,4,128,1,float16,fp8,0,7.401248296101888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,8,128,1,float16,float16,0,9.816234588623047
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,8,128,1,float16,fp8,0,7.293834686279297
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,64,128,1,float16,float16,0,5.543994903564453
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,1,128,1,float16,float16,0,4.251461346944173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,64,128,1,float16,fp8,0,4.986000061035156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,2,128,1,float16,float16,0,4.272666613260905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,2,128,1,float16,fp8,0,3.4564746220906577
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,4,128,1,float16,fp8,0,3.9715894063313804
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,4,128,1,float16,float16,0,4.606346766153972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,8,128,1,float16,float16,0,4.506970723470052
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,1,128,1,float16,fp8,0,3.401466687520345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,8,128,1,float16,fp8,0,3.9234933853149414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,1,128,1,float16,fp8,0,1.874293327331543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,1,128,1,float16,float16,0,2.1911840438842773
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,2,128,1,float16,float16,0,2.185365358988444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,1,128,1,float16,fp8,0,6.84982426961263
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,64,128,1,float16,fp8,0,2.4995786348978677
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,2,128,1,float16,fp8,0,1.7659626007080078
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,4,128,1,float16,float16,0,2.271205266316732
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,4,128,1,float16,fp8,0,1.92413330078125
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,8,128,1,float16,float16,0,2.2934773763020835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,8,128,1,float16,fp8,0,1.8800427118937175
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,1,128,1,float16,float16,0,1.1068480014801025
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,2,128,1,float16,fp8,0,0.9110933144887289
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,64,128,1,float16,fp8,0,1.2789759635925293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,4,128,1,float16,float16,0,1.155941327412923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,64,128,1,float16,float16,0,2.739919980367025
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,2,128,1,float16,float16,0,0.9609653155008951
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,8,128,1,float16,float16,0,1.1475199858347576
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,8,128,1,float16,fp8,0,1.0058773358662922
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,1,128,1,float16,float16,0,0.5602826674779257
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,64,128,1,float16,fp8,0,0.6713706652323405
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,1,128,1,float16,fp8,0,0.8972853024800619
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,64,128,1,float16,float16,0,1.3570559819539387
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,4,128,1,float16,float16,0,0.6096373399098715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,4,128,1,float16,fp8,0,0.5204266707102457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,1,128,1,float16,fp8,0,0.47815465927124023
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,8,128,1,float16,float16,0,0.6138079961140951
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,64,128,1,float16,float16,0,0.729312022527059
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,8,128,1,float16,fp8,0,0.5183039903640747
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,2,128,1,float16,float16,0,1.1170612970987956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,4,128,1,float16,fp8,0,0.9811466534932455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,2,128,1,float16,float16,0,0.5705013275146484
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,1,128,1,float16,fp8,0,6.477680206298828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,2,128,1,float16,fp8,0,0.4872320095698039
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,1,128,1,float16,float16,0,8.963781356811523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,4,128,1,float16,float16,0,9.225146611531576
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,2,128,1,float16,fp8,0,6.676080067952474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,4,128,1,float16,fp8,0,7.063690821329753
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,2,128,1,float16,float16,0,9.110506693522135
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,8,128,1,float16,float16,0,9.100442886352539
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,1,128,1,float16,float16,0,3.9306933085123696
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,1,128,1,float16,fp8,0,3.2198559443155923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,2,128,1,float16,fp8,0,3.3044427235921225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,4,128,1,float16,float16,0,4.748655954996745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,4,128,1,float16,fp8,0,3.924821217854818
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,8,128,1,float16,float16,0,4.443386713663737
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,8,128,1,float16,fp8,0,3.706437428792318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,8,128,1,float16,fp8,0,7.164554595947266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,64,128,1,float16,float16,0,5.63920529683431
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,1,128,1,float16,float16,0,2.1140693028767905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,1,128,1,float16,fp8,0,1.6298880577087402
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,64,128,1,float16,float16,0,2.795578638712565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,64,128,1,float16,fp8,0,2.6623093287150064
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,2,128,1,float16,fp8,0,1.672986666361491
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,4,128,1,float16,fp8,0,1.8432586987813313
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,8,128,1,float16,fp8,0,1.8816800117492676
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,2,128,1,float16,float16,0,4.130330721537272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,64,128,1,float16,float16,0,1.41593599319458
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,1,128,1,float16,float16,0,1.0045973459879558
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,1,128,1,float16,fp8,0,0.8361546993255615
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,2,128,1,float16,fp8,0,0.8606560230255127
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,4,128,1,float16,float16,0,2.226367950439453
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,4,128,1,float16,float16,0,1.102725346883138
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,4,128,1,float16,fp8,0,0.9629759788513184
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,8,128,1,float16,float16,0,2.2042880058288574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,8,128,1,float16,fp8,0,0.9384960333506266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,64,128,1,float16,fp8,0,5.309130668640137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,64,128,1,float16,fp8,0,1.3561280568440754
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,64,128,1,float16,fp8,0,0.6994773546854655
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,2,128,1,float16,float16,0,0.5298133293787638
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,1,128,1,float16,fp8,0,0.4443519910176595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,2,128,1,float16,float16,0,1.0607893466949463
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,2,128,1,float16,fp8,0,0.45628265539805096
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,4,128,1,float16,float16,0,0.5678720076878866
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,8,128,1,float16,float16,0,0.5734719832738241
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,8,128,1,float16,float16,0,1.1206400394439697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,1,128,1,float16,float16,0,0.279968003431956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,1,128,1,float16,float16,0,0.5165919860204061
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,1,128,1,float16,fp8,0,0.2420533299446106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,64,128,1,float16,fp8,0,0.37644799550374347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,2,128,1,float16,fp8,0,0.24680533011754355
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,4,128,1,float16,fp8,0,0.2634933392206828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,8,128,1,float16,float16,0,0.29979199171066284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,8,128,1,float16,fp8,0,0.2697333296140035
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,64,128,1,float16,float16,0,0.40689067045847577
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,2,128,1,float16,float16,0,0.2839733362197876
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,64,128,1,float16,float16,0,0.7155466874440511
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,4,128,1,float16,float16,0,0.30345600843429565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,1,128,1,float16,float16,0,4.907477378845215
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,8,128,1,float16,fp8,0,0.5038613478342692
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,1,128,1,float16,fp8,0,3.8459040323893228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,2,128,1,float16,float16,0,4.967359860738118
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,2,128,1,float16,fp8,0,3.9615465799967446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,4,128,1,float16,float16,0,5.500458399454753
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,4,128,1,float16,fp8,0,4.535690625508626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,4,128,1,float16,fp8,0,0.5050293207168579
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,8,128,1,float16,fp8,0,4.445919990539551
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,8,128,1,float16,float16,0,5.381210962931315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,64,128,1,float16,fp8,0,3.4792000452677407
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,1,128,1,float16,fp8,0,1.9550719261169434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,64,128,1,float16,float16,0,3.4844373067220054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,2,128,1,float16,float16,0,2.3970932960510254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,2,128,1,float16,fp8,0,2.007695992787679
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,4,128,1,float16,float16,0,2.697178522745768
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,2,128,1,float16,float16,0,2.0274880727132163
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,4,128,1,float16,fp8,0,2.197360038757324
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,8,128,1,float16,float16,0,2.682058652242025
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,1,128,1,float16,float16,0,1.210319995880127
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,8,128,1,float16,fp8,0,2.327002684275309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,64,128,1,float16,fp8,0,1.7667786280314128
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,2,128,1,float16,fp8,0,1.0210453669230144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,64,128,1,float16,float16,0,1.750282605489095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,4,128,1,float16,float16,0,1.3524692853291829
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,8,128,1,float16,fp8,0,1.1613600254058838
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,1,128,1,float16,float16,0,0.6159573396046957
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,64,128,1,float16,float16,0,0.8821973005930582
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,1,128,1,float16,fp8,0,0.9923306306203207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,1,128,1,float16,fp8,0,0.5133066574732462
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,1,128,1,float16,float16,0,2.5082880655924478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,2,128,1,float16,fp8,0,0.5307146708170573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,4,128,1,float16,fp8,0,1.1653546492258708
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,4,128,1,float16,fp8,0,0.6094826857248942
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,8,128,1,float16,float16,0,0.6678933302561442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,8,128,1,float16,fp8,0,0.6034133434295654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,64,128,1,float16,fp8,0,0.8989600340525309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,64,128,1,float16,float16,0,0.5043466488520304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,2,128,1,float16,float16,0,1.2106613318125408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,1,128,1,float16,float16,0,0.31988799571990967
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,2,128,1,float16,float16,0,0.3252906600634257
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,2,128,1,float16,fp8,0,0.2850400010744731
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,8,128,1,float16,float16,0,1.3154239654541016
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,4,128,1,float16,fp8,0,0.3163359959920247
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,8,128,1,float16,float16,0,0.3513866662979126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,2,128,1,float16,float16,0,0.6371039946873983
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,1,128,1,float16,fp8,0,0.2783520023028056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,1,128,1,float16,float16,0,0.17575999101003012
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,4,128,1,float16,float16,0,0.3514133294423421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,64,128,1,float16,fp8,0,0.25916266441345215
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,2,128,1,float16,float16,0,0.18092799186706543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,8,128,1,float16,fp8,0,0.31804800033569336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,4,128,1,float16,float16,0,0.19133333365122476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,4,128,1,float16,fp8,0,0.17124267419179282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,8,128,1,float16,float16,0,0.19134400288263956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,8,128,1,float16,fp8,0,0.17325333754221597
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,64,128,1,float16,fp8,0,0.46990398565928143
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,1,128,1,float16,fp8,0,0.15643733739852905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,1,128,1,float16,fp8,0,3.8171412150065103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,2,128,1,float16,fp8,0,0.159061332543691
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,1,128,1,float16,float16,0,4.7374881108601885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,2,128,1,float16,fp8,0,3.9873971939086914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,4,128,1,float16,float16,0,0.6727306842803955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,4,128,1,float16,float16,0,5.6180159250895185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,4,128,1,float16,fp8,0,4.767834663391113
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,8,128,1,float16,float16,0,5.426885604858398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,8,128,1,float16,fp8,0,4.662970542907715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,2,128,1,float16,float16,0,4.9043839772542315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,1,128,1,float16,fp8,0,1.9265119234720867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,64,128,1,float16,fp8,0,3.971482594807943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,64,128,1,float16,float16,0,0.27053866783777875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,2,128,1,float16,float16,0,2.3573385874430337
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,4,128,1,float16,float16,0,2.6974026362101235
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,4,128,1,float16,fp8,0,2.30293862024943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,8,128,1,float16,float16,0,2.7480427424112954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,8,128,1,float16,fp8,0,2.3582666714986167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,1,128,1,float16,float16,0,2.372752030690511
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,64,128,1,float16,float16,0,3.8051252365112305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,64,128,1,float16,fp8,0,2.003215948740641
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,2,128,1,float16,fp8,0,1.021888017654419
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,2,128,1,float16,float16,0,1.198805332183838
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,4,128,1,float16,fp8,0,1.1598347028096516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,8,128,1,float16,float16,0,1.3834826151529949
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,8,128,1,float16,fp8,0,1.1742080052693684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,1,128,1,float16,float16,0,0.5875360171000162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,64,128,1,float16,float16,0,0.9579520225524902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,1,128,1,float16,fp8,0,0.5029493172963461
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,4,128,1,float16,float16,0,1.3328746954600017
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,2,128,1,float16,fp8,0,0.5245920022328695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,1,128,1,float16,float16,0,1.1848373413085938
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,8,128,1,float16,float16,0,0.6747519969940186
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,2,128,1,float16,fp8,0,2.0071093241373696
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,64,128,1,float16,fp8,0,1.0165013472239177
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,2,128,1,float16,float16,0,0.6192160050074259
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,4,128,1,float16,float16,0,0.679370641708374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,1,128,1,float16,float16,0,0.30591466029485065
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,64,128,1,float16,float16,0,0.4891039927800496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,1,128,1,float16,fp8,0,0.2709760069847107
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,2,128,1,float16,float16,0,0.3099626700083415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,4,128,1,float16,float16,0,0.3511626720428467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,4,128,1,float16,fp8,0,0.3303520083427429
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,8,128,1,float16,fp8,0,0.3270026644070943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,4,128,1,float16,fp8,0,0.6297119855880737
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,64,128,1,float16,fp8,0,0.5236159960428873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,64,128,1,float16,float16,0,0.2877279917399089
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,1,128,1,float16,float16,0,0.16906134287516275
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,2,128,1,float16,fp8,0,0.2888373335202535
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,64,128,1,float16,float16,0,1.905669371287028
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,2,128,1,float16,float16,0,0.1731520096460978
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,2,128,1,float16,fp8,0,0.15552533666292825
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,4,128,1,float16,float16,0,0.19604800144831339
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,8,128,1,float16,float16,0,0.19192532698313394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,8,128,1,float16,fp8,0,0.17922665675481161
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,1,128,1,float16,fp8,0,0.15025599797566733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,64,128,1,float16,fp8,0,0.2821333408355713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,1,128,1,float16,fp8,0,0.9955946604410807
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,1,128,1,float16,float16,0,0.09689600268999736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,8,128,1,float16,float16,0,0.3535840113957723
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,2,128,1,float16,float16,0,0.09814400474230449
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,4,128,1,float16,float16,0,0.10392000277837117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,8,128,1,float16,float16,0,0.10645332932472229
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,8,128,1,float16,fp8,0,0.09754666686058044
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,8,128,1,float16,fp8,0,0.6038879950841268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,1,128,1,float16,fp8,0,0.08859200278917949
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,2,128,1,float16,fp8,0,0.0883679986000061
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,4,128,1,float16,fp8,0,0.09662399689356486
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,1,128,1,float16,float16,0,2.810090700785319
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,2,128,1,float16,fp8,0,2.513173262278239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,2,128,1,float16,float16,0,2.9345973332722983
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,64,128,1,float16,float16,0,0.15982400377591452
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,4,128,1,float16,float16,0,3.45413875579834
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,4,128,1,float16,fp8,0,0.1750826636950175
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,4,128,1,float16,fp8,0,3.1962080001831055
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,1,128,1,float16,float16,0,1.4562560717264812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,8,128,1,float16,fp8,0,3.046229362487793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,64,128,1,float16,float16,0,2.5345706939697266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,64,128,1,float16,fp8,0,2.7526400883992515
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,1,128,1,float16,fp8,0,2.3845973014831543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,2,128,1,float16,float16,0,1.5052480697631836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,2,128,1,float16,fp8,0,1.2719519933064778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,4,128,1,float16,fp8,0,1.461525281270345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,4,128,1,float16,float16,0,1.6948960622151692
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,8,128,1,float16,float16,0,3.3143094380696616
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,8,128,1,float16,fp8,0,1.5022026697794597
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,1,128,1,float16,fp8,0,1.2137119770050049
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,64,128,1,float16,fp8,0,0.16081600387891135
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,64,128,1,float16,float16,0,1.2842933336893718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,2,128,1,float16,float16,0,0.7365813255310059
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,64,128,1,float16,fp8,0,1.382186730702718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,2,128,1,float16,fp8,0,0.6520693302154541
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,4,128,1,float16,fp8,0,0.7817173004150391
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,8,128,1,float16,float16,0,1.7095200220743816
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,8,128,1,float16,float16,0,0.8405226866404215
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,1,128,1,float16,float16,0,0.3675626516342163
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,1,128,1,float16,float16,0,0.7096800009409586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,1,128,1,float16,fp8,0,0.32264000177383423
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,1,128,1,float16,fp8,0,0.6147840023040771
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,64,128,1,float16,fp8,0,0.7046453158060709
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,2,128,1,float16,fp8,0,0.34144532680511475
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,4,128,1,float16,float16,0,0.4367520014444987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,4,128,1,float16,float16,0,0.8753706614176432
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,8,128,1,float16,fp8,0,0.4064319928487142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,8,128,1,float16,fp8,0,0.7669920126597086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,1,128,1,float16,float16,0,0.1991200049718221
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,1,128,1,float16,fp8,0,0.17845332622528076
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,64,128,1,float16,fp8,0,0.36661334832509357
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,2,128,1,float16,float16,0,0.3743520180384318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,2,128,1,float16,fp8,0,0.18524267276128134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,4,128,1,float16,float16,0,0.2278133432070414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,4,128,1,float16,fp8,0,0.2195626695950826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,4,128,1,float16,fp8,0,0.4030239979426066
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,64,128,1,float16,fp8,0,0.1973066727320353
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,64,128,1,float16,float16,0,0.34761067231496173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,64,128,1,float16,float16,0,0.68394668896993
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,1,128,1,float16,float16,0,0.11100266377131145
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,2,128,1,float16,float16,0,0.20497600237528482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,2,128,1,float16,fp8,0,0.10292800267537434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,8,128,1,float16,float16,0,0.2311306595802307
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,4,128,1,float16,fp8,0,0.11397866408030193
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,8,128,1,float16,float16,0,0.1293706695238749
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,8,128,1,float16,fp8,0,0.11512000362078349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,64,128,1,float16,float16,0,0.20304532845815024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,64,128,1,float16,float16,0,0.11618133385976155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,64,128,1,float16,fp8,0,0.11173333724339803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,1,128,1,float16,float16,0,0.06674666702747345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,1,128,1,float16,fp8,0,0.06037333110968272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,8,128,1,float16,fp8,0,0.21757332483927408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,8,128,1,float16,float16,0,0.436901330947876
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,4,128,1,float16,fp8,0,0.06819200019041698
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,8,128,1,float16,fp8,0,0.06853333115577698
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,2,128,1,float16,float16,0,0.06724266707897186
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,1,128,1,float16,fp8,0,0.09913600484530131
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,2,128,1,float16,fp8,0,0.06221333146095276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,1,128,1,float16,float16,0,2.9738505681355796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,4,128,1,float16,float16,0,0.0717493345340093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,1,128,1,float16,fp8,0,2.520773410797119
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,2,128,1,float16,fp8,0,2.691072146097819
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,2,128,1,float16,float16,0,0.11371733744939168
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,4,128,1,float16,float16,0,3.7941548029581704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,8,128,1,float16,float16,0,0.0710506687561671
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,1,128,1,float16,float16,0,1.4767732620239258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,8,128,1,float16,float16,0,3.77347723642985
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,8,128,1,float16,fp8,0,3.3542772928873696
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,2,128,1,float16,float16,0,3.055562655131022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,64,128,1,float16,float16,0,2.9589920043945312
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,4,128,1,float16,fp8,0,3.170192082722982
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,64,128,1,float16,fp8,0,3.312368075052897
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,4,128,1,float16,float16,0,1.8258934020996094
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,4,128,1,float16,fp8,0,1.622437318166097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,8,128,1,float16,fp8,0,1.7438079516092937
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,8,128,1,float16,float16,0,1.8657013575236003
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,4,128,1,float16,float16,0,0.12781866391499838
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,2,128,1,float16,float16,0,1.573578675587972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,2,128,1,float16,fp8,0,1.3590614000956218
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,2,128,1,float16,float16,0,0.7654986381530762
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,64,128,1,float16,fp8,0,1.6645387013753254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,4,128,1,float16,float16,0,0.9279253482818604
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,4,128,1,float16,fp8,0,0.8237280050913492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,8,128,1,float16,float16,0,0.9130187034606934
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,8,128,1,float16,fp8,0,0.8664267063140869
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,1,128,1,float16,fp8,0,1.3076000213623047
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,1,128,1,float16,float16,0,0.36987733840942383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,64,128,1,float16,float16,0,1.4877066612243652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,2,128,1,float16,float16,0,0.39046935240427655
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,64,128,1,float16,float16,0,0.7437919775644938
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,2,128,1,float16,fp8,0,0.3641226689020793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,4,128,1,float16,float16,0,0.46508800983428955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,4,128,1,float16,fp8,0,0.46479467550913495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,8,128,1,float16,float16,0,0.45536001523335773
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,1,128,1,float16,float16,0,0.7521440188090006
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,64,128,1,float16,fp8,0,0.4333920081456502
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,64,128,1,float16,fp8,0,0.8432746728261312
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,2,128,1,float16,fp8,0,0.6941973368326823
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,2,128,1,float16,float16,0,0.20957867304484049
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,2,128,1,float16,fp8,0,0.1918506622314453
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,8,128,1,float16,fp8,0,0.4361813465754191
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,4,128,1,float16,float16,0,0.2403306762377421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,4,128,1,float16,fp8,0,0.24034132560094199
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,8,128,1,float16,float16,0,0.24945600827534994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,1,128,1,float16,fp8,0,0.6521866718928019
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,1,128,1,float16,fp8,0,0.34030401706695557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,64,128,1,float16,float16,0,0.38256533940633136
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,1,128,1,float16,float16,0,0.1106666624546051
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,1,128,1,float16,fp8,0,0.10035733381907146
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,2,128,1,float16,fp8,0,0.1037013332049052
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,4,128,1,float16,float16,0,0.13380266229311624
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,8,128,1,float16,fp8,0,0.24445333083470663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,8,128,1,float16,float16,0,0.13586666186650595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,1,128,1,float16,float16,0,0.2002026637395223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,64,128,1,float16,float16,0,0.23258666197458902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,64,128,1,float16,fp8,0,0.2291146715482076
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,64,128,1,float16,float16,0,0.11102933684984843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,1,128,1,float16,float16,0,0.06428800026575725
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,2,128,1,float16,float16,0,0.06570666531721751
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,4,128,1,float16,float16,0,0.0708000014225642
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,4,128,1,float16,fp8,0,0.06807999809583028
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,1,128,1,float16,fp8,0,0.18336532513300577
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,8,128,1,float16,float16,0,0.07147199908892314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,8,128,1,float16,fp8,0,0.06856533388296764
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,64,128,1,float16,fp8,0,0.1241439978281657
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,1,128,1,float16,fp8,0,0.058037335673967995
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,4,128,1,float16,fp8,0,0.12552000085512796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,1,128,1,float16,float16,0,0.041722665230433144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,2,128,1,float16,float16,0,0.04167999823888143
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,2,128,1,float16,fp8,0,0.03962666789690653
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,8,128,1,float16,fp8,0,0.13082133730252585
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,2,128,1,float16,float16,0,0.11548266808191936
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,8,128,1,float16,float16,0,0.04548799991607666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,8,128,1,float16,fp8,0,0.04324266811211904
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,2,128,1,float16,fp8,0,0.060047999024391174
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,64,128,1,float16,float16,0,0.06339733302593231
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,4,128,1,float16,float16,0,0.044346665342648826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,1,128,1,float16,float16,0,2.053391933441162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,1,128,1,float16,fp8,0,1.8686985969543457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,2,128,1,float16,float16,0,2.1962879498799643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,2,128,1,float16,fp8,0,2.0314079920450845
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,1,128,1,float16,fp8,0,0.037621334195137024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,4,128,1,float16,fp8,0,2.4790186882019043
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,4,128,1,float16,fp8,0,0.042992000778516136
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,8,128,1,float16,float16,0,2.7661120096842446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,1,128,1,float16,fp8,0,0.9516479969024658
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,64,128,1,float16,fp8,0,0.06439466774463654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,1,128,1,float16,float16,0,1.0329973697662354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,64,128,1,float16,float16,0,2.5633974075317383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,2,128,1,float16,fp8,0,1.0296906630198162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,2,128,1,float16,float16,0,1.112885316212972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,8,128,1,float16,fp8,0,2.6220480600992837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,64,128,1,float16,fp8,0,2.962554613749186
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,4,128,1,float16,float16,0,1.4390239715576172
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,8,128,1,float16,float16,0,1.476842721303304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,8,128,1,float16,fp8,0,1.4189173380533855
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,4,128,1,float16,float16,0,2.8765172958374023
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,1,128,1,float16,fp8,0,0.4840533336003621
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,2,128,1,float16,float16,0,0.5692960023880005
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,2,128,1,float16,fp8,0,0.5211893320083618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,4,128,1,float16,fp8,0,0.6921493212381998
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,1,128,1,float16,float16,0,0.5345333417256674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,4,128,1,float16,float16,0,0.7018400033315023
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,64,128,1,float16,fp8,0,1.4917759895324707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,64,128,1,float16,float16,0,1.2779573599497478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,64,128,1,float16,fp8,0,0.7560213406880697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,1,128,1,float16,float16,0,0.27586134274800617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,1,128,1,float16,fp8,0,0.2550453344980876
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,2,128,1,float16,float16,0,0.2995679974555969
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,2,128,1,float16,fp8,0,0.27297600110371906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,4,128,1,float16,fp8,0,0.3655466636021932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,8,128,1,float16,float16,0,0.7258186340332031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,8,128,1,float16,float16,0,0.35491732756296795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,1,128,1,float16,float16,0,0.14924800395965576
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,64,128,1,float16,float16,0,0.3291200002034505
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,64,128,1,float16,float16,0,0.6403893232345581
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,64,128,1,float16,fp8,0,0.38607466220855713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,4,128,1,float16,fp8,0,1.3588107426961262
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,4,128,1,float16,float16,0,0.3749706745147705
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,2,128,1,float16,float16,0,0.1604373355706533
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,4,128,1,float16,fp8,0,0.19531200329462686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,8,128,1,float16,fp8,0,0.686357339223226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,8,128,1,float16,float16,0,0.1946293314297994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,1,128,1,float16,float16,0,0.08362133304278056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,1,128,1,float16,fp8,0,0.07437333464622498
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,64,128,1,float16,float16,0,0.1731839974721273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,1,128,1,float16,fp8,0,0.13682132959365845
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,4,128,1,float16,float16,0,0.10614400108655293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,2,128,1,float16,fp8,0,0.14999999602635702
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,4,128,1,float16,fp8,0,0.09657599528630574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,8,128,1,float16,float16,0,0.10719999670982361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,8,128,1,float16,fp8,0,0.19533334175745645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,8,128,1,float16,fp8,0,0.1018933355808258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,1,128,1,float16,float16,0,0.046757335464159645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,1,128,1,float16,fp8,0,0.04374399781227112
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,2,128,1,float16,float16,0,0.08744000395139058
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,64,128,1,float16,fp8,0,0.2038080096244812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,2,128,1,float16,fp8,0,0.04582933088143667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,4,128,1,float16,float16,0,0.05418666700522105
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,2,128,1,float16,fp8,0,0.08475200335184734
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,4,128,1,float16,fp8,0,0.052154665191968284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,8,128,1,float16,float16,0,0.05458133419354757
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,8,128,1,float16,fp8,0,0.05423999826113383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,8,128,1,float16,fp8,0,0.3513813416163127
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,1,128,1,float16,float16,0,0.03155199935038885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,64,128,1,float16,fp8,0,0.05566399792830149
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,1,128,1,float16,fp8,0,0.029109333952267964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,64,128,1,float16,float16,0,0.05359466870625814
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,64,128,1,float16,fp8,0,0.10967466235160828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,4,128,1,float16,float16,0,0.03417066733042399
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,4,128,1,float16,fp8,0,0.03389866650104523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,8,128,1,float16,float16,0,0.0351946676770846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,8,128,1,float16,fp8,0,0.03356266766786575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,64,128,1,float16,float16,0,0.02665599932273229
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,64,128,1,float16,float16,0,0.0953546663125356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,2,128,1,float16,fp8,0,0.029535998900731403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,2,128,1,float16,float16,0,0.01947733387351036
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,2,128,1,float16,fp8,0,0.019098666807015736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,4,128,1,float16,float16,0,0.021136000752449036
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,8,128,1,float16,float16,0,0.021173333128293354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,8,128,1,float16,fp8,0,0.02123733361562093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,64,128,1,float16,fp8,0,0.03369066615899404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,2,128,1,float16,float16,0,0.031514666974544525
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,2,128,1,float16,float16,0,0.04798933366934458
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,1,128,1,float16,fp8,0,0.01923199991385142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,4,128,1,float16,float16,0,0.19665600856145224
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,1,128,1,float16,float16,0,0.8346880276997884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,4,128,1,float16,fp8,0,0.021386665602525074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,2,128,1,float16,float16,0,0.8735679785410563
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,2,128,1,float16,fp8,0,0.8458613554636637
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,4,128,1,float16,float16,0,1.1871519883473713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,1,128,1,float16,float16,0,0.019280000279347103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,4,128,1,float16,fp8,0,1.2167680263519287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,8,128,1,float16,float16,0,1.25218669573466
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,1,128,1,float16,fp8,0,0.7680799961090088
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,8,128,1,float16,fp8,0,1.2311519781748455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,1,128,1,float16,float16,0,0.41539732615152997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,1,128,1,float16,fp8,0,0.39460798104604083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,2,128,1,float16,fp8,0,0.43723734219868976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,4,128,1,float16,float16,0,0.5970293283462524
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,4,128,1,float16,fp8,0,0.5722613334655762
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,8,128,1,float16,float16,0,0.5881760120391846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,1,128,1,float16,float16,0,0.21751999855041504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,64,128,1,float16,float16,0,0.5880000193913778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,1,128,1,float16,fp8,0,0.2068693240483602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,2,128,1,float16,float16,0,0.45881064732869464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,64,128,1,float16,fp8,0,1.3963039716084797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,2,128,1,float16,float16,0,0.2348746657371521
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,4,128,1,float16,float16,0,0.3201120098431905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,4,128,1,float16,fp8,0,0.32896532615025836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,8,128,1,float16,float16,0,0.3017226656277974
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,8,128,1,float16,fp8,0,0.6426506837209066
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,8,128,1,float16,fp8,0,0.30210665861765545
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,1,128,1,float16,float16,0,0.11826666196187337
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,2,128,1,float16,fp8,0,0.12050132950146993
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,64,128,1,float16,fp8,0,0.36077332496643066
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,64,128,1,float16,float16,0,1.1566399733225505
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,4,128,1,float16,float16,0,0.1635040044784546
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,4,128,1,float16,fp8,0,0.16638933618863425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,2,128,1,float16,fp8,0,0.23278399308522543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,8,128,1,float16,float16,0,0.1771893302599589
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,8,128,1,float16,fp8,0,0.17207467555999756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,1,128,1,float16,float16,0,0.06656000018119812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,2,128,1,float16,float16,0,0.07261866827805837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,64,128,1,float16,float16,0,0.15824000040690103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,64,128,1,float16,float16,0,0.30037333567937213
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,2,128,1,float16,fp8,0,0.06273066500822704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,64,128,1,float16,fp8,0,0.7072906494140625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,4,128,1,float16,float16,0,0.09288000067075093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,8,128,1,float16,float16,0,0.08774399757385254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,4,128,1,float16,fp8,0,0.08505599697430928
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,1,128,1,float16,float16,0,0.0383146678407987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,1,128,1,float16,fp8,0,0.03694933404525121
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,64,128,1,float16,float16,0,0.08754133184750874
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,64,128,1,float16,fp8,0,0.1881493330001831
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,2,128,1,float16,float16,0,0.041264000038305916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,1,128,1,float16,fp8,0,0.11146666606267293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,4,128,1,float16,float16,0,0.045797333121299744
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,8,128,1,float16,float16,0,0.04636266827583313
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,8,128,1,float16,fp8,0,0.08839999636014302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,64,128,1,float16,float16,0,0.04576533536116282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,64,128,1,float16,fp8,0,0.10178666313489278
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,1,128,1,float16,fp8,0,0.060133333007494606
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,64,128,1,float16,fp8,0,0.04987200101216634
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,2,128,1,float16,float16,0,0.12638399998346964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,2,128,1,float16,float16,0,0.02517866591612498
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,2,128,1,float16,fp8,0,0.03868266691764196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,4,128,1,float16,fp8,0,0.04587733248869578
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,4,128,1,float16,fp8,0,0.02739733209212621
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,4,128,1,float16,float16,0,0.027402666707833607
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,8,128,1,float16,fp8,0,0.028181334336598713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,1,128,1,float16,float16,0,0.016415999581416447
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,1,128,1,float16,fp8,0,0.015226667126019796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,64,128,1,float16,fp8,0,0.031504000226656594
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,2,128,1,float16,float16,0,0.015856000284353893
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,2,128,1,float16,fp8,0,0.017711999515692394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,4,128,1,float16,float16,0,0.017103999853134155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,1,128,1,float16,float16,0,0.023520000278949738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,8,128,1,float16,float16,0,0.01883200059334437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,8,128,1,float16,fp8,0,0.019440000255902607
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,64,128,1,float16,float16,0,0.023333333432674408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,1,128,1,float16,fp8,0,0.023247999449570973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,1,128,1,float16,float16,0,0.015130666395028433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,8,128,1,float16,fp8,0,0.046800002455711365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,1,128,1,float16,fp8,0,0.015306666493415833
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,2,128,1,float16,fp8,0,0.015205333630243937
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,4,128,1,float16,float16,0,0.015098666151364645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,64,128,1,float16,float16,0,0.01926933353145917
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,4,128,1,float16,fp8,0,0.015439999600251516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,8,128,1,float16,float16,0,0.015173333386580149
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,4,128,1,float16,fp8,0,0.01887999971707662
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,8,128,1,float16,float16,0,0.02752000093460083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,2,128,1,float16,float16,0,0.015141333142916361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,1,128,1,float16,fp8,0,0.4853173494338989
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,2,128,1,float16,float16,0,0.5335093339284261
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,2,128,1,float16,fp8,0,0.5310773452123007
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,8,128,1,float16,fp8,0,0.015520000209410986
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,4,128,1,float16,fp8,0,0.6579626798629761
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,2,128,1,float16,fp8,0,0.023978665471076965
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,8,128,1,float16,float16,0,0.6781653563181559
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,1,128,1,float16,float16,0,0.4978346824645996
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,1,128,1,float16,float16,0,0.2600160042444865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,64,128,1,float16,float16,0,0.6428639888763428
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,1,128,1,float16,fp8,0,0.2516746719678243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,64,128,1,float16,fp8,0,0.7521066665649414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,2,128,1,float16,float16,0,0.2778453429539998
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,4,128,1,float16,float16,0,0.6950879891713461
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,4,128,1,float16,fp8,0,0.34720532099405926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,8,128,1,float16,float16,0,0.3443946838378906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,8,128,1,float16,fp8,0,0.7187146345774332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,8,128,1,float16,fp8,0,0.3495573202768962
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,1,128,1,float16,float16,0,0.1397760013739268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,64,128,1,float16,fp8,0,0.3829066753387451
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,1,128,1,float16,fp8,0,0.13591466347376505
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,4,128,1,float16,float16,0,0.1853333314259847
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,4,128,1,float16,fp8,0,0.1844480037689209
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,8,128,1,float16,float16,0,0.18946133057276407
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,64,128,1,float16,float16,0,0.3194826642672221
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,2,128,1,float16,float16,0,0.14864533146222433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,2,128,1,float16,fp8,0,0.2731039921442668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,64,128,1,float16,float16,0,0.16590933005015054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,1,128,1,float16,fp8,0,0.07266133526961009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,4,128,1,float16,float16,0,0.3689546585083008
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,64,128,1,float16,fp8,0,0.19960000117619833
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,2,128,1,float16,fp8,0,0.07854400078455608
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,8,128,1,float16,fp8,0,0.19108800093332926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,4,128,1,float16,float16,0,0.09833066662152608
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,8,128,1,float16,float16,0,0.10108266274134318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,8,128,1,float16,fp8,0,0.09841066598892212
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,1,128,1,float16,float16,0,0.041696002086003624
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,1,128,1,float16,fp8,0,0.039877332746982574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,1,128,1,float16,float16,0,0.07730666796366374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,2,128,1,float16,float16,0,0.04196266829967499
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,64,128,1,float16,fp8,0,0.02236266682545344
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,4,128,1,float16,float16,0,0.04825599988301595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,4,128,1,float16,fp8,0,0.049957334995269775
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,8,128,1,float16,float16,0,0.04993066688378652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,8,128,1,float16,fp8,0,0.04993600149949392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,64,128,1,float16,fp8,0,0.05264000097910563
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,64,128,1,float16,float16,0,0.049957334995269775
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,1,128,1,float16,float16,0,0.026901334524154663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,1,128,1,float16,fp8,0,0.02553066611289978
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,2,128,1,float16,float16,0,0.027488000690937042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,2,128,1,float16,float16,0,0.08402666449546814
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,2,128,1,float16,fp8,0,0.027258666853109997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,4,128,1,float16,float16,0,0.02945599953333537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,4,128,1,float16,fp8,0,0.10233066479365031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,8,128,1,float16,float16,0,0.030181333422660828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,64,128,1,float16,fp8,0,0.03156266609827677
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,64,128,1,float16,float16,0,0.023669332265853882
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,64,128,1,float16,float16,0,0.08870933453241985
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,2,128,1,float16,fp8,0,0.1439466675122579
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,2,128,1,float16,fp8,0,0.041706666350364685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,1,128,1,float16,fp8,0,0.017152000218629837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,4,128,1,float16,fp8,0,0.01913600042462349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,8,128,1,float16,fp8,0,0.03146133323510488
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,8,128,1,float16,float16,0,0.01918399954835574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,8,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,1,128,1,float16,float16,0,0.01725333308180173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,64,128,1,float16,fp8,0,0.10517332951227824
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,64,128,1,float16,float16,0,0.015141333142916361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,1,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,2,128,1,float16,float16,0,0.017397332936525345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,2,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,4,128,1,float16,fp8,0,0.030042665700117748
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,4,128,1,float16,float16,0,0.013061333447694778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,8,128,1,float16,fp8,0,0.01309866706530253
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,64,128,1,float16,fp8,0,0.01915733392039935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,1,128,1,float16,float16,0,0.013151999562978745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,64,128,1,float16,float16,0,0.014858666807413101
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,1,128,1,float16,float16,0,0.013082666943470636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,1,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,2,128,1,float16,float16,0,0.013157332936922709
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,4,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,4,128,1,float16,float16,0,0.013210666676362356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,8,128,1,float16,float16,0,0.01309866706530253
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,4,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,8,128,1,float16,fp8,0,0.013178666432698568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,64,128,1,float16,fp8,0,0.01721599946419398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,2,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,8,128,1,float16,float16,0,0.013077333569526672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,2,128,1,float16,fp8,0,0.017184000462293625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,4,128,1,float16,float16,0,0.01931200052301089
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,1,128,1,float16,fp8,0,0.40057067076365155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,2,128,1,float16,fp8,0,0.4201706647872925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,4,128,1,float16,fp8,0,0.5157653490702311
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,8,128,1,float16,float16,0,0.49874667326609295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,2,128,1,float16,float16,0,0.01313599944114685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,4,128,1,float16,float16,0,0.5169386863708496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,8,128,1,float16,fp8,0,0.4982080062230428
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,64,128,1,float16,float16,0,0.44092798233032227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,64,128,1,float16,fp8,0,0.46036799748738605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,2,128,1,float16,float16,0,0.23002133766810098
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,2,128,1,float16,fp8,0,0.22219733397165933
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,8,128,1,float16,float16,0,0.26897066831588745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,4,128,1,float16,float16,0,0.26266666253407794
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,1,128,1,float16,float16,0,0.21966399749120077
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,1,128,1,float16,fp8,0,0.21065600713094076
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,1,128,1,float16,float16,0,0.11794132987658183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,64,128,1,float16,fp8,0,0.2360373338063558
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,64,128,1,float16,float16,0,0.22714134057362875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,1,128,1,float16,fp8,0,0.1123466690381368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,2,128,1,float16,float16,0,0.1218826671441396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,1,128,1,float16,float16,0,0.41602667172749835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,4,128,1,float16,fp8,0,0.1362986663977305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,4,128,1,float16,float16,0,0.14245333274205527
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,8,128,1,float16,float16,0,0.1398293375968933
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,8,128,1,float16,fp8,0,0.2717653314272563
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,1,128,1,float16,float16,0,0.06356800099213918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,64,128,1,float16,fp8,0,0.12396799524625142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,2,128,1,float16,float16,0,0.06460266808668773
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,1,128,1,float16,fp8,0,0.06002666552861532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,4,128,1,float16,float16,0,0.06964266796906789
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,4,128,1,float16,fp8,0,0.06900266806284587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,8,128,1,float16,float16,0,0.0740479975938797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,8,128,1,float16,fp8,0,0.0699893335501353
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,2,128,1,float16,fp8,0,0.11652800440788269
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,1,128,1,float16,float16,0,0.03628266602754593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,8,128,1,float16,fp8,0,0.14033066232999167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,1,128,1,float16,fp8,0,0.033600000043710075
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,64,128,1,float16,fp8,0,0.06039999922116598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,2,128,1,float16,float16,0,0.03765333443880081
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,4,128,1,float16,float16,0,0.039647998909155525
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,2,128,1,float16,fp8,0,0.06198399762312571
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,8,128,1,float16,float16,0,0.0395359992980957
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,8,128,1,float16,fp8,0,0.03985599925120672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,4,128,1,float16,fp8,0,0.26181866725285846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,1,128,1,float16,float16,0,0.023423999547958374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,64,128,1,float16,float16,0,0.029504001140594482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,64,128,1,float16,fp8,0,0.03766933331886927
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,64,128,1,float16,float16,0,0.06623466809590657
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,2,128,1,float16,float16,0,0.023290666441122692
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,4,128,1,float16,fp8,0,0.03930133332808813
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,4,128,1,float16,fp8,0,0.025413334369659424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,8,128,1,float16,fp8,0,0.02518400053183238
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,2,128,1,float16,float16,0,0.4283253351847331
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,64,128,1,float16,float16,0,0.019152000546455383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,1,128,1,float16,fp8,0,0.023205332458019257
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,1,128,1,float16,float16,0,0.017231999586025875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,64,128,1,float16,float16,0,0.11922666430473328
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,64,128,1,float16,fp8,0,0.023317334552605946
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,2,128,1,float16,fp8,0,0.023221333821614582
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,2,128,1,float16,fp8,0,0.015978666643301647
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,8,128,1,float16,float16,0,0.016058667252461117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,8,128,1,float16,fp8,0,0.016938666502634685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,2,128,1,float16,fp8,0,0.035717333356539406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,64,128,1,float16,float16,0,0.015146666516860327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,4,128,1,float16,float16,0,0.025455998877684276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,1,128,1,float16,float16,0,0.013002666334311167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,4,128,1,float16,fp8,0,0.015696000307798386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,1,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,4,128,1,float16,float16,0,0.012986666212479273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,4,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,1,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,2,128,1,float16,float16,0,0.016842667013406754
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,8,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,4,128,1,float16,float16,0,0.016208000481128693
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,1,128,1,float16,float16,0,0.011077333241701126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,8,128,1,float16,float16,0,0.025616000096003216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,1,128,1,float16,fp8,0,0.01240533341964086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,2,128,1,float16,float16,0,0.012975999464591345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,2,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,4,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,8,128,1,float16,float16,0,0.012736000120639801
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,8,128,1,float16,float16,0,0.013093333691358566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,8,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,64,128,1,float16,float16,0,0.013221333424250284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,2,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,1,128,1,float16,fp8,0,0.3551573355992635
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,2,128,1,float16,float16,0,0.3744479815165202
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,4,128,1,float16,float16,0,0.40778132279713947
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,64,128,1,float16,fp8,0,0.017322666943073273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,4,128,1,float16,float16,0,0.012373333175977072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,8,128,1,float16,float16,0,0.4058506488800049
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,2,128,1,float16,fp8,0,0.3676266670227051
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,8,128,1,float16,fp8,0,0.4034506479899089
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,64,128,1,float16,fp8,0,0.3099466760953267
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,64,128,1,float16,float16,0,0.3012266755104065
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,2,128,1,float16,float16,0,0.012586666891972223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,1,128,1,float16,fp8,0,0.18429332971572876
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,4,128,1,float16,fp8,0,0.4071253140767415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,2,128,1,float16,float16,0,0.1991200049718221
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,2,128,1,float16,fp8,0,0.19365866978963217
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,4,128,1,float16,fp8,0,0.21092265844345093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,8,128,1,float16,fp8,0,0.21101333697636923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,1,128,1,float16,float16,0,0.19498133659362793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,64,128,1,float16,fp8,0,0.01720533271630605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,1,128,1,float16,float16,0,0.10379733641942342
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,1,128,1,float16,fp8,0,0.0974133312702179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,2,128,1,float16,float16,0,0.10461866855621338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,4,128,1,float16,float16,0,0.21441600720087686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,8,128,1,float16,float16,0,0.21170133352279663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,2,128,1,float16,fp8,0,0.09896533687909444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,4,128,1,float16,fp8,0,0.10724266370137532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,1,128,1,float16,float16,0,0.36657599608103436
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,1,128,1,float16,float16,0,0.05622933308283488
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,64,128,1,float16,float16,0,0.0876693328221639
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,1,128,1,float16,fp8,0,0.052144000927607216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,64,128,1,float16,fp8,0,0.08442133665084839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,2,128,1,float16,fp8,0,0.05422399938106537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,4,128,1,float16,float16,0,0.06154666841030121
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,8,128,1,float16,float16,0,0.11572800079981486
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,4,128,1,float16,fp8,0,0.057631999254226685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,8,128,1,float16,float16,0,0.06029333174228668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,64,128,1,float16,float16,0,0.15836800138155618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,1,128,1,float16,float16,0,0.0335359995563825
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,64,128,1,float16,fp8,0,0.1674399971961975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,1,128,1,float16,fp8,0,0.031354665756225586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,64,128,1,float16,float16,0,0.04186666508515676
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,4,128,1,float16,float16,0,0.11314133803049724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,4,128,1,float16,float16,0,0.035802667339642845
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,2,128,1,float16,float16,0,0.03356266766786575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,4,128,1,float16,fp8,0,0.03349866718053818
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,8,128,1,float16,fp8,0,0.10733333230018616
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,8,128,1,float16,fp8,0,0.03349333256483078
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,8,128,1,float16,fp8,0,0.05816533168156942
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,1,128,1,float16,fp8,0,0.021333334346612293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,64,128,1,float16,fp8,0,0.027994667490323383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,2,128,1,float16,float16,0,0.021930667261282604
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,64,128,1,float16,fp8,0,0.0458133320013682
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,4,128,1,float16,float16,0,0.022976001103719074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,2,128,1,float16,fp8,0,0.03136000037193298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,8,128,1,float16,fp8,0,0.021498667697111767
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,8,128,1,float16,float16,0,0.03568533311287562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,1,128,1,float16,float16,0,0.022330666581789654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,1,128,1,float16,float16,0,0.01533866673707962
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,64,128,1,float16,float16,0,0.02536533276240031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,1,128,1,float16,fp8,0,0.015130666395028433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,2,128,1,float16,fp8,0,0.015109332899252573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,2,128,1,float16,float16,0,0.015184000134468079
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,2,128,1,float16,fp8,0,0.02124800036350886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,4,128,1,float16,fp8,0,0.015247999380032221
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,8,128,1,float16,float16,0,0.015114666273196539
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,8,128,1,float16,float16,0,0.022789334257443745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,8,128,1,float16,fp8,0,0.014970666418472925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,1,128,1,float16,float16,0,0.011648000528415045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,64,128,1,float16,fp8,0,0.01724799970785777
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,1,128,1,float16,fp8,0,0.012624000509579977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,2,128,1,float16,float16,0,0.011242666592200598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,2,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,4,128,1,float16,float16,0,0.011087999989589056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,4,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,8,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,64,128,1,float16,float16,0,0.017130666722853977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,64,128,1,float16,fp8,0,0.019248000035683315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,64,128,1,float16,float16,0,0.012997332960367203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,1,128,1,float16,float16,0,0.011114666859308878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,2,128,1,float16,float16,0,0.011125333607196808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,64,128,1,float16,fp8,0,0.01711999997496605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,2,128,1,float16,fp8,0,0.011055999745925268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,4,128,1,float16,float16,0,0.011600000162919363
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,4,128,1,float16,fp8,0,0.021301334102948506
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,8,128,1,float16,float16,0,0.011173332730929056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,8,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,64,128,1,float16,float16,0,0.014298666268587112
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,1,128,1,float16,float16,0,0.3619146744410197
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,1,128,1,float16,fp8,0,0.33275200923283893
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,1,128,1,float16,fp8,0,0.011237333218256632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,4,128,1,float16,float16,0,0.015344000111023584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,4,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,2,128,1,float16,float16,0,0.058506667613983154
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,2,128,1,float16,float16,0,0.3657386700312297
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,2,128,1,float16,fp8,0,0.34025601545969647
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,4,128,1,float16,fp8,0,0.3582506577173869
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,8,128,1,float16,fp8,0,0.3569440046946208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,64,128,1,float16,float16,0,0.2412853240966797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,64,128,1,float16,fp8,0,0.23783467213312784
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,8,128,1,float16,float16,0,0.012058666596810022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,2,128,1,float16,float16,0,0.18757865826288858
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,4,128,1,float16,float16,0,0.37718399365743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,4,128,1,float16,fp8,0,0.18163732687632242
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,8,128,1,float16,float16,0,0.3733813365300496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,1,128,1,float16,float16,0,0.19049066305160522
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,8,128,1,float16,fp8,0,0.18638400236765543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,1,128,1,float16,fp8,0,0.1721973419189453
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,64,128,1,float16,float16,0,0.12850133577982584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,64,128,1,float16,fp8,0,0.12559466560681662
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,2,128,1,float16,float16,0,0.10178666313489278
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,2,128,1,float16,fp8,0,0.0922986666361491
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,4,128,1,float16,fp8,0,0.09654933214187622
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,2,128,1,float16,fp8,0,0.17482666174570718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,8,128,1,float16,float16,0,0.1967573364575704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,8,128,1,float16,float16,0,0.10114133358001709
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,8,128,1,float16,fp8,0,0.09629866480827332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,64,128,1,float16,fp8,0,0.06434666613737743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,64,128,1,float16,float16,0,0.06840533514817555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,1,128,1,float16,float16,0,0.10130133231480916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,1,128,1,float16,fp8,0,0.05002133548259735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,2,128,1,float16,float16,0,0.05640000104904175
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,4,128,1,float16,float16,0,0.05613866448402405
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,4,128,1,float16,fp8,0,0.05204799771308899
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,4,128,1,float16,float16,0,0.10478933652242024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,8,128,1,float16,float16,0,0.0576800008614858
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,8,128,1,float16,fp8,0,0.052111998200416565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,1,128,1,float16,float16,0,0.033786666889985405
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,1,128,1,float16,float16,0,0.05669333537419637
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,64,128,1,float16,float16,0,0.03549866626660029
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,1,128,1,float16,fp8,0,0.09086400270462036
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,2,128,1,float16,float16,0,0.03436800092458725
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,4,128,1,float16,fp8,0,0.030421334008375805
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,8,128,1,float16,float16,0,0.033002667129039764
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,4,128,1,float16,float16,0,0.20182400941848755
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,64,128,1,float16,fp8,0,0.03691199918588003
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,64,128,1,float16,float16,0,0.023311999936898548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,1,128,1,float16,float16,0,0.021205333371957142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,2,128,1,float16,float16,0,0.021253332495689392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,1,128,1,float16,fp8,0,0.021216000119845074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,2,128,1,float16,fp8,0,0.021253332495689392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,4,128,1,float16,float16,0,0.021216000119845074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,4,128,1,float16,fp8,0,0.021354667842388153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,8,128,1,float16,fp8,0,0.021327999730904896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,64,128,1,float16,fp8,0,0.025333332518736523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,1,128,1,float16,fp8,0,0.031231999397277832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,64,128,1,float16,float16,0,0.017269333203633625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,1,128,1,float16,float16,0,0.015461333096027374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,1,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,4,128,1,float16,float16,0,0.03364266703526179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,2,128,1,float16,float16,0,0.015173333386580149
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,64,128,1,float16,fp8,0,0.01951466624935468
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,8,128,1,float16,fp8,0,0.031354665756225586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,8,128,1,float16,float16,0,0.015194666882356008
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,8,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,64,128,1,float16,float16,0,0.015237333873907724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,2,128,1,float16,fp8,0,0.05053866902987162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,1,128,1,float16,float16,0,0.011418666690587997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,1,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,4,128,1,float16,fp8,0,0.015301333119471868
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,2,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,2,128,1,float16,float16,0,0.011050666371981302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,8,128,1,float16,float16,0,0.02149333308140437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,4,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,8,128,1,float16,float16,0,0.011834666132926941
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,8,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,1,128,1,float16,float16,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,64,128,1,float16,fp8,0,0.016415999581416447
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,1,128,1,float16,fp8,0,0.01201066623131434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,64,128,1,float16,fp8,0,0.015477333217859268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,2,128,1,float16,float16,0,0.011087999989589056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,4,128,1,float16,float16,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,4,128,1,float16,float16,0,0.015210667004187902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,8,128,1,float16,float16,0,0.011045332998037338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,8,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,1,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,1,128,1,float16,float16,0,0.015173333386580149
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,64,128,1,float16,float16,0,0.013306666165590286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,2,128,1,float16,fp8,0,0.011071999867757162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,4,128,1,float16,fp8,0,0.01126933346192042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,2,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,2,128,1,float16,fp8,0,0.019391999890406925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,4,128,1,float16,float16,0,0.011087999989589056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,4,128,1,float16,float16,0,0.024325333535671234
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,0,0.020517333100239437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,8,128,1,float16,float16,0,0.029285334050655365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,1,128,1,float16,float16,0,0.01089599976936976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,1,128,1,float16,fp8,0,0.011157333850860596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,64,128,1,float16,fp8,0,0.08180266618728638
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,2,128,1,float16,float16,0,0.015194666882356008
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,2,128,1,float16,fp8,0,0.031114667654037476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,0,0.017136000096797943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,0,0.015189333508412043
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,0,0.019258666783571243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,0,0.016997333616018295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,1,128,1,float16,float16,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,64,128,1,float16,fp8,0,0.04586133360862732
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,2,128,1,float16,float16,0,0.011125333607196808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,2,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,64,128,1,float16,float16,0,0.10658133029937744
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,0,0.012634667257467905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,2,128,1,float16,float16,0,0.02123733361562093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,1,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,64,128,1,float16,float16,0,0.031845333675543465
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,1,128,1,float16,float16,0,0.01089599976936976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,2,128,1,float16,fp8,0,0.0143306665122509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,2,128,1,float16,float16,0,0.01209066684047381
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,2,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,0,0.01522133375207583
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,0,0.010933333386977514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,0,0.010954666882753372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,64,128,1,float16,float16,0,0.060405333836873375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,64,128,1,float16,fp8,0,0.029535998900731403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,64,128,1,float16,float16,0,0.021253332495689392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,64,128,1,float16,fp8,0,0.021205333371957142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,1,128,1,float16,fp8,0,0.01089599976936976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,2,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,0,0.010842667271693548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,0,0.01089599976936976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,0,0.025968000292778015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,0,0.010496000448862711
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,0,0.009205333267649015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,1,128,1,float16,float16,0,0.010960000256697336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,1,128,1,float16,float16,0,0.010901333143313726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,1,128,1,float16,fp8,0,0.011050666371981302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,64,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,2,128,1,float16,float16,0,0.010832000523805618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,2,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,0,0.009466666728258133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,0,0.009162666896979014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,0,0.009130666653315226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,0,0.00956266683836778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,1,128,1,float16,float16,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,1,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,64,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,2,128,1,float16,float16,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,2,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,0,0.009008000294367472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,0,0.00891733355820179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,1,128,1,float16,float16,0,0.011007999380429586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,64,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,1,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,64,128,1,float16,float16,0,0.011322667201360067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,2,128,1,float16,float16,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,0,0.009237333511312803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,64,128,1,float16,float16,0,0.01312000056107839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,0,0.008912000184257826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,1,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,64,128,1,float16,float16,0,0.01591466615597407
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,1,128,1,float16,float16,0,0.01098666712641716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,1,128,1,float16,fp8,0,0.011157333850860596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,2,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,2,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,0,0.010837333897749582
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,0,0.009050666665037474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,0,0.009722666814923286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,64,128,1,float16,fp8,0,0.013327999661366144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,0,0.009194666519761086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,0,0.009183999771873156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,2,128,1,float16,float16,0,0.010928000013033548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,64,128,1,float16,float16,0,0.01192533348997434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,2,128,1,float16,float16,0,0.010933333386977514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,1,128,1,float16,fp8,0,17.078768412272137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,2,128,1,float16,fp8,0,17.242127736409504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,1,128,1,float16,float16,0,23.353050231933594
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,2,128,1,float16,float16,0,23.017534891764324
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,4,128,1,float16,fp8,0,17.45147196451823
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,4,128,1,float16,float16,0,23.748036702473957
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,48,128,1,float16,float16,0,12.66326904296875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,1,128,1,float16,fp8,0,8.539056142171225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,8,128,1,float16,fp8,0,17.66742451985677
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,2,128,1,float16,fp8,0,8.667610804239908
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,2,128,1,float16,float16,0,11.8582394917806
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,48,128,1,float16,fp8,0,9.833632151285807
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,1,128,1,float16,float16,0,11.982271830240885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,4,128,1,float16,fp8,0,8.81607437133789
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,4,128,1,float16,float16,0,11.8939577738444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,1,128,1,float16,float16,0,5.384554545084636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,48,128,1,float16,fp8,0,4.986213366190593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,8,128,1,float16,float16,0,11.822063446044922
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,8,128,1,float16,float16,0,23.87036387125651
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,2,128,1,float16,float16,0,5.688208262125651
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,2,128,1,float16,fp8,0,4.698474566141765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,4,128,1,float16,fp8,0,4.546527862548828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,4,128,1,float16,float16,0,5.608608245849609
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,8,128,1,float16,float16,0,5.994192123413086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,1,128,1,float16,fp8,0,4.2622880935668945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,1,128,1,float16,float16,0,2.7395359675089517
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,8,128,1,float16,fp8,0,4.634853363037109
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,1,128,1,float16,fp8,0,2.319119930267334
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,48,128,1,float16,fp8,0,2.5694185892740884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,2,128,1,float16,fp8,0,2.2581812540690103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,4,128,1,float16,float16,0,2.7550665537516275
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,4,128,1,float16,fp8,0,2.2595574061075845
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,8,128,1,float16,float16,0,2.7547839482625327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,8,128,1,float16,fp8,0,2.271695931752523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,48,128,1,float16,float16,0,3.0427627563476562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,2,128,1,float16,float16,0,2.734256108601888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,8,128,1,float16,fp8,0,8.876714706420898
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,1,128,1,float16,fp8,0,9.832015991210938
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,1,128,1,float16,float16,0,13.329760233561197
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,2,128,1,float16,float16,0,13.435690561930338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,4,128,1,float16,fp8,0,10.122698465983072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,4,128,1,float16,float16,0,13.852347056070963
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,2,128,1,float16,fp8,0,9.991029103597006
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,8,128,1,float16,float16,0,14.111216227213541
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,8,128,1,float16,fp8,0,10.346319834391275
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,1,128,1,float16,float16,0,6.5373280843098955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,48,128,1,float16,float16,0,6.141578674316406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,1,128,1,float16,fp8,0,4.962640126546224
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,2,128,1,float16,float16,0,6.403146743774414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,2,128,1,float16,fp8,0,4.911424001057942
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,4,128,1,float16,float16,0,6.696298599243164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,48,128,1,float16,float16,0,6.957258860270183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,8,128,1,float16,float16,0,6.835376103719075
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,48,128,1,float16,float16,0,3.4926878611246743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,1,128,1,float16,float16,0,3.017754554748535
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,48,128,1,float16,fp8,0,3.2139625549316406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,4,128,1,float16,fp8,0,5.066143989562988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,48,128,1,float16,fp8,0,6.086698532104492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,2,128,1,float16,fp8,0,2.5793174107869468
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,4,128,1,float16,float16,0,3.2383572260538735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,4,128,1,float16,fp8,0,2.615978717803955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,8,128,1,float16,fp8,0,2.6308266321818032
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,8,128,1,float16,float16,0,3.152378718058268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,1,128,1,float16,fp8,0,2.889765421549479
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,48,128,1,float16,fp8,0,1.68067200978597
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,1,128,1,float16,float16,0,1.5426987012227376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,1,128,1,float16,fp8,0,1.2759146690368652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,2,128,1,float16,float16,0,1.5738879839579265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,2,128,1,float16,fp8,0,1.2951893011728923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,4,128,1,float16,float16,0,1.6564480463663738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,4,128,1,float16,fp8,0,1.366938591003418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,8,128,1,float16,float16,0,1.6234453519185383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,8,128,1,float16,fp8,0,5.254469235738118
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,48,128,1,float16,float16,0,1.7733546892801921
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,1,128,1,float16,fp8,0,6.818623860677083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,2,128,1,float16,float16,0,3.0605332056681314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,1,128,1,float16,float16,0,9.443632125854492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,8,128,1,float16,fp8,0,1.4182613690694172
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,2,128,1,float16,float16,0,9.528895696004232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,4,128,1,float16,float16,0,9.424154917399088
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,1,128,1,float16,float16,0,4.461013476053874
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,48,128,1,float16,fp8,0,4.427610715230306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,2,128,1,float16,fp8,0,6.979546864827474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,48,128,1,float16,float16,0,5.14299742380778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,1,128,1,float16,fp8,0,3.737231890360514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,2,128,1,float16,float16,0,4.426597277323405
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,2,128,1,float16,fp8,0,3.4750188191731772
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,4,128,1,float16,fp8,0,7.239461263020833
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,4,128,1,float16,float16,0,4.528965314229329
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,8,128,1,float16,float16,0,4.562032063802083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,8,128,1,float16,fp8,0,7.299039840698242
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,1,128,1,float16,float16,0,2.1467572848002114
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,48,128,1,float16,fp8,0,2.2623626391092935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,1,128,1,float16,fp8,0,2.15992005666097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,2,128,1,float16,float16,0,2.171818733215332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,2,128,1,float16,fp8,0,1.8909385999043782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,4,128,1,float16,float16,0,2.2604853312174478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,4,128,1,float16,fp8,0,3.7514400482177734
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,8,128,1,float16,fp8,0,1.9142667452494304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,8,128,1,float16,float16,0,2.269381364186605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,8,128,1,float16,float16,0,9.671248118082682
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,48,128,1,float16,fp8,0,1.1765013535817463
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,1,128,1,float16,float16,0,1.1068586508433025
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,48,128,1,float16,float16,0,1.2918773492177327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,2,128,1,float16,float16,0,1.140565315882365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,2,128,1,float16,fp8,0,0.9356906414031982
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,4,128,1,float16,fp8,0,1.0010933081309001
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,4,128,1,float16,fp8,0,1.9515093167622883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,8,128,1,float16,fp8,0,3.880154609680176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,1,128,1,float16,fp8,0,0.9732267061869303
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,4,128,1,float16,float16,0,1.1650880177815754
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,8,128,1,float16,fp8,0,0.9935999711354574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,1,128,1,float16,fp8,0,9.114816029866537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,48,128,1,float16,float16,0,2.5081067085266113
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,2,128,1,float16,fp8,0,9.273183822631836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,8,128,1,float16,float16,0,1.186079978942871
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,2,128,1,float16,float16,0,12.500155131022135
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,4,128,1,float16,fp8,0,9.952720006306967
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,1,128,1,float16,float16,0,12.683391571044922
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,4,128,1,float16,float16,0,13.03805923461914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,8,128,1,float16,fp8,0,9.685173034667969
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,1,128,1,float16,float16,0,5.822906494140625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,48,128,1,float16,fp8,0,6.069616317749023
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,48,128,1,float16,float16,0,6.895392100016276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,2,128,1,float16,float16,0,5.831610361735026
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,2,128,1,float16,fp8,0,4.750624020894368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,4,128,1,float16,float16,0,5.897754669189453
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,8,128,1,float16,float16,0,6.242677052815755
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,8,128,1,float16,float16,0,12.70745595296224
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,8,128,1,float16,fp8,0,5.023210525512695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,48,128,1,float16,float16,0,3.3960692087809243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,48,128,1,float16,fp8,0,3.0868053436279297
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,1,128,1,float16,float16,0,2.7971413930257163
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,1,128,1,float16,fp8,0,2.7667519251505532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,2,128,1,float16,float16,0,2.866367975870768
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,2,128,1,float16,fp8,0,2.3174452781677246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,4,128,1,float16,fp8,0,2.4712799390157065
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,4,128,1,float16,fp8,0,4.950746536254883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,8,128,1,float16,float16,0,2.940410614013672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,8,128,1,float16,fp8,0,2.5487573941548667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,1,128,1,float16,fp8,0,4.472592035929362
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,48,128,1,float16,fp8,0,1.5771360397338867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,2,128,1,float16,fp8,0,1.2011840343475342
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,4,128,1,float16,float16,0,1.5546026229858398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,4,128,1,float16,fp8,0,1.2993866602579753
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,4,128,1,float16,float16,0,2.959477424621582
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,8,128,1,float16,fp8,0,1.2885973453521729
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,1,128,1,float16,fp8,0,1.1750986576080322
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,48,128,1,float16,float16,0,1.7422186533610027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,48,128,1,float16,fp8,0,0.8259680271148682
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,1,128,1,float16,float16,0,0.7333333492279053
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,1,128,1,float16,fp8,0,0.6244853337605795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,8,128,1,float16,float16,0,1.5355733235677083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,4,128,1,float16,float16,0,0.7902506987253824
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,8,128,1,float16,float16,0,0.7940959930419922
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,1,128,1,float16,float16,0,1.4165867169698079
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,2,128,1,float16,float16,0,1.4441439310709636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,8,128,1,float16,fp8,0,0.6850506464640299
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,2,128,1,float16,fp8,0,0.6353013515472412
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,4,128,1,float16,fp8,0,0.6840213139851888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,1,128,1,float16,fp8,0,5.2417707443237305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,1,128,1,float16,float16,0,7.049791971842448
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,48,128,1,float16,float16,0,0.8821173508961996
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,2,128,1,float16,float16,0,7.036362965901692
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,2,128,1,float16,float16,0,0.7500159740447998
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,4,128,1,float16,float16,0,7.184218724568685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,4,128,1,float16,fp8,0,5.786703745524089
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,8,128,1,float16,fp8,0,5.941647847493489
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,8,128,1,float16,float16,0,7.530261357625325
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,48,128,1,float16,fp8,0,3.9536587397257485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,2,128,1,float16,fp8,0,5.407237370808919
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,2,128,1,float16,fp8,0,2.7200746536254883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,4,128,1,float16,fp8,0,2.949455897013346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,8,128,1,float16,float16,0,3.611450513203939
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,48,128,1,float16,float16,0,4.1019894282023115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,48,128,1,float16,float16,0,2.0972426732381186
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,1,128,1,float16,float16,0,1.631989320119222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,48,128,1,float16,fp8,0,1.9905333518981934
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,4,128,1,float16,float16,0,3.52732785542806
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,2,128,1,float16,float16,0,1.647098700205485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,2,128,1,float16,fp8,0,1.3930080731709797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,8,128,1,float16,fp8,0,3.0055999755859375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,4,128,1,float16,fp8,0,1.5320053100585938
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,8,128,1,float16,float16,0,1.8015413284301758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,8,128,1,float16,fp8,0,1.5362292925516765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,1,128,1,float16,float16,0,0.8343893686930338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,1,128,1,float16,fp8,0,1.3595946629842122
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,48,128,1,float16,fp8,0,1.0112000306447346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,1,128,1,float16,fp8,0,0.7084960142771403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,2,128,1,float16,float16,0,0.864250659942627
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,2,128,1,float16,fp8,0,0.7240479787190756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,4,128,1,float16,float16,0,0.9070453643798828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,4,128,1,float16,fp8,0,0.7890613079071045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,8,128,1,float16,float16,0,0.9278133710225424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,8,128,1,float16,fp8,0,0.79202667872111
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,48,128,1,float16,float16,0,1.0537226994832356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,48,128,1,float16,fp8,0,0.5361813306808472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,1,128,1,float16,fp8,0,0.38861334323883057
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,1,128,1,float16,float16,0,3.2289759318033853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,2,128,1,float16,fp8,0,0.3959626754124959
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,4,128,1,float16,float16,0,0.4777066707611084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,4,128,1,float16,fp8,0,0.43034664789835614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,8,128,1,float16,float16,0,0.47499199708302814
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,8,128,1,float16,fp8,0,0.42663466930389404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,48,128,1,float16,float16,0,0.5684213240941366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,2,128,1,float16,float16,0,0.45797332127888996
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,1,128,1,float16,fp8,0,2.80570125579834
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,1,128,1,float16,float16,0,6.546298980712891
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,1,128,1,float16,float16,0,0.44413332144419354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,2,128,1,float16,fp8,0,5.229093233744304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,2,128,1,float16,float16,0,3.352293332417806
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,4,128,1,float16,float16,0,7.078890482584636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,4,128,1,float16,fp8,0,5.767189025878906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,1,128,1,float16,fp8,0,5.054501215616862
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,2,128,1,float16,float16,0,6.636901219685872
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,8,128,1,float16,float16,0,7.292245229085286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,8,128,1,float16,fp8,0,5.832799911499023
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,1,128,1,float16,fp8,0,2.5533013343811035
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,4,128,1,float16,float16,0,1.766032059987386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,2,128,1,float16,float16,0,3.1628907521565757
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,2,128,1,float16,fp8,0,2.8830931981404624
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,4,128,1,float16,float16,0,3.5231145222981772
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,4,128,1,float16,fp8,0,2.9065545399983725
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,8,128,1,float16,float16,0,3.4395360946655273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,8,128,1,float16,fp8,0,3.264992078145345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,48,128,1,float16,float16,0,4.29146671295166
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,1,128,1,float16,float16,0,3.0749120712280273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,2,128,1,float16,float16,0,1.6003200213114421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,48,128,1,float16,fp8,0,2.14737606048584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,2,128,1,float16,fp8,0,1.3413653373718262
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,4,128,1,float16,float16,0,1.714362621307373
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,8,128,1,float16,float16,0,1.769045352935791
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,8,128,1,float16,fp8,0,1.5478240648905437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,48,128,1,float16,float16,0,1.0785173575083415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,48,128,1,float16,float16,0,2.116960048675537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,48,128,1,float16,fp8,0,4.220608075459798
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,48,128,1,float16,fp8,0,1.0723840395609539
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,1,128,1,float16,fp8,0,0.6712693373362223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,4,128,1,float16,float16,0,0.893333355585734
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,4,128,1,float16,fp8,0,0.7994826634724935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,8,128,1,float16,float16,0,0.8858880201975504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,8,128,1,float16,fp8,0,0.7738986810048422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,1,128,1,float16,float16,0,1.5336853663126628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,1,128,1,float16,float16,0,0.7964853445688883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,48,128,1,float16,float16,0,0.57859734694163
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,2,128,1,float16,float16,0,0.8023946285247803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,1,128,1,float16,fp8,0,0.3616960048675537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,2,128,1,float16,fp8,0,0.6999093691507975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,2,128,1,float16,float16,0,0.4160746733347575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,2,128,1,float16,fp8,0,0.3741919994354248
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,4,128,1,float16,float16,0,0.4522560040156047
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,4,128,1,float16,fp8,0,0.4195893208185832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,8,128,1,float16,float16,0,0.466869314511617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,8,128,1,float16,fp8,0,0.41946665445963544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,1,128,1,float16,fp8,0,1.3897973696390789
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,48,128,1,float16,float16,0,0.32316799958546955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,1,128,1,float16,fp8,0,0.20225600401560465
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,48,128,1,float16,fp8,0,0.3052053252855937
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,2,128,1,float16,fp8,0,0.20797866582870483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,2,128,1,float16,float16,0,0.23349867264429727
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,4,128,1,float16,fp8,0,0.2242506742477417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,8,128,1,float16,float16,0,0.24964267015457153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,4,128,1,float16,fp8,0,1.477669397989909
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,8,128,1,float16,fp8,0,0.22504534324010214
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,1,128,1,float16,float16,0,0.22900799910227457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,1,128,1,float16,float16,0,0.4077599843343099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,1,128,1,float16,float16,0,3.722720146179199
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,1,128,1,float16,fp8,0,3.076202710469564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,48,128,1,float16,fp8,0,0.5586080153783163
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,4,128,1,float16,float16,0,0.2511413296063741
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,2,128,1,float16,fp8,0,3.1953226725260415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,4,128,1,float16,float16,0,4.169557253519694
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,1,128,1,float16,float16,0,1.8642452557881672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,8,128,1,float16,float16,0,4.143674532572429
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,48,128,1,float16,float16,0,2.707882563273112
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,1,128,1,float16,fp8,0,1.6163573265075684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,2,128,1,float16,float16,0,1.9098025957743328
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,2,128,1,float16,float16,0,3.8124958674112954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,2,128,1,float16,fp8,0,1.70469331741333
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,4,128,1,float16,float16,0,2.127290725708008
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,4,128,1,float16,fp8,0,1.817855993906657
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,4,128,1,float16,fp8,0,3.5919361114501953
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,1,128,1,float16,float16,0,0.9135839939117432
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,48,128,1,float16,float16,0,1.3612106641133626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,1,128,1,float16,fp8,0,0.8032960096995035
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,48,128,1,float16,fp8,0,1.3971254030863445
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,2,128,1,float16,float16,0,0.940608024597168
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,2,128,1,float16,fp8,0,0.8307147026062012
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,4,128,1,float16,fp8,0,0.9631360371907552
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,8,128,1,float16,float16,0,1.0393973191579182
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,8,128,1,float16,fp8,0,0.993834654490153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,8,128,1,float16,fp8,0,1.893232027689616
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,1,128,1,float16,float16,0,0.47463464736938477
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,48,128,1,float16,fp8,0,0.7158772945404053
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,8,128,1,float16,fp8,0,3.6632960637410483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,48,128,1,float16,fp8,0,2.7565011978149414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,4,128,1,float16,float16,0,0.543445348739624
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,4,128,1,float16,fp8,0,0.5071093241373698
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,4,128,1,float16,float16,0,1.058458646138509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,8,128,1,float16,fp8,0,0.49584531784057617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,8,128,1,float16,float16,0,2.1324480374654136
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,48,128,1,float16,float16,0,0.3849173386891683
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,48,128,1,float16,fp8,0,0.3801279862721761
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,1,128,1,float16,fp8,0,0.4188426733016968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,1,128,1,float16,float16,0,0.2569653391838074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,2,128,1,float16,float16,0,0.4858666658401489
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,1,128,1,float16,fp8,0,0.2327359914779663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,2,128,1,float16,fp8,0,0.4404533306757609
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,4,128,1,float16,fp8,0,0.2711679935455322
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,8,128,1,float16,float16,0,0.2864053249359131
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,8,128,1,float16,float16,0,0.5420533418655396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,1,128,1,float16,float16,0,0.1456160048643748
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,48,128,1,float16,fp8,0,0.20965866247812906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,1,128,1,float16,fp8,0,0.1318719983100891
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,48,128,1,float16,float16,0,0.7029759883880615
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,2,128,1,float16,float16,0,0.14855466286341348
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,4,128,1,float16,float16,0,0.15777599811553955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,2,128,1,float16,fp8,0,0.23749866088231406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,4,128,1,float16,float16,0,0.2879306674003601
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,8,128,1,float16,fp8,0,0.1495199998219808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,8,128,1,float16,fp8,0,0.27171732981999713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,48,128,1,float16,float16,0,0.20218666394551596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,2,128,1,float16,fp8,0,0.1346773306528727
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,1,128,1,float16,float16,0,3.6642773946126304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,4,128,1,float16,fp8,0,0.14683199922243753
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,2,128,1,float16,float16,0,3.7577813466389975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,8,128,1,float16,float16,0,0.15872533122698465
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,2,128,1,float16,fp8,0,3.2901760737101235
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,4,128,1,float16,fp8,0,3.866645177205404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,8,128,1,float16,float16,0,4.284880002339681
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,1,128,1,float16,fp8,0,3.114368120829264
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,8,128,1,float16,fp8,0,4.035749435424805
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,2,128,1,float16,float16,0,0.26587732632954914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,48,128,1,float16,fp8,0,3.1618239084879556
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,2,128,1,float16,float16,0,1.8566239674886067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,1,128,1,float16,fp8,0,1.6566133499145508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,2,128,1,float16,fp8,0,1.6541333198547363
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,4,128,1,float16,float16,0,2.223973274230957
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,4,128,1,float16,fp8,0,1.9381599426269531
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,4,128,1,float16,float16,0,4.35642147064209
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,8,128,1,float16,fp8,0,1.9902079900105794
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,8,128,1,float16,float16,0,2.1303787231445312
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,1,128,1,float16,float16,0,0.9020266532897949
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,48,128,1,float16,fp8,0,1.605072021484375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,48,128,1,float16,float16,0,1.482437292734782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,2,128,1,float16,fp8,0,0.8421493371327718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,4,128,1,float16,float16,0,1.0749013423919678
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,4,128,1,float16,fp8,0,0.9787200291951498
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,8,128,1,float16,float16,0,1.0701546669006348
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,8,128,1,float16,fp8,0,1.0118613243103027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,1,128,1,float16,float16,0,0.4592906634012858
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,48,128,1,float16,float16,0,2.9740587870279946
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,1,128,1,float16,float16,0,1.835301399230957
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,1,128,1,float16,fp8,0,0.8003573417663574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,2,128,1,float16,float16,0,0.49305065472920734
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,4,128,1,float16,float16,0,0.559279998143514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,4,128,1,float16,fp8,0,0.5449866851170858
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,8,128,1,float16,float16,0,0.5602186520894369
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,8,128,1,float16,fp8,0,0.5304373502731323
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,1,128,1,float16,fp8,0,0.4166986544926961
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,48,128,1,float16,fp8,0,0.4209973414738973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,2,128,1,float16,float16,0,0.9387146631876627
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,2,128,1,float16,fp8,0,0.4379146496454875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,2,128,1,float16,fp8,0,0.238864004611969
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,4,128,1,float16,float16,0,0.2916266719500224
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,4,128,1,float16,fp8,0,0.27748266855875653
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,8,128,1,float16,fp8,0,0.27928000688552856
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,48,128,1,float16,float16,0,0.7555519739786783
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,48,128,1,float16,float16,0,0.23253333568572998
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,2,128,1,float16,float16,0,0.25513599316279095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,48,128,1,float16,fp8,0,0.2277173399925232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,1,128,1,float16,float16,0,0.14063466588656107
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,2,128,1,float16,float16,0,0.1439520021279653
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,2,128,1,float16,fp8,0,0.13105066617329916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,4,128,1,float16,float16,0,0.16249600052833557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,4,128,1,float16,fp8,0,0.14647466937700906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,8,128,1,float16,float16,0,0.29808000723520917
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,8,128,1,float16,float16,0,0.16287466883659363
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,48,128,1,float16,float16,0,0.43805332978566486
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,1,128,1,float16,fp8,0,0.22618667284647623
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,1,128,1,float16,fp8,0,0.07704000174999237
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,2,128,1,float16,float16,0,0.08457066615422566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,1,128,1,float16,fp8,0,0.12598933776219687
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,2,128,1,float16,fp8,0,0.07914133369922638
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,4,128,1,float16,float16,0,0.09107733766237895
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,4,128,1,float16,fp8,0,0.08627200126647949
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,8,128,1,float16,float16,0,0.09107733766237895
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,8,128,1,float16,fp8,0,0.14943466583887735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,8,128,1,float16,fp8,0,0.08687466382980347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,48,128,1,float16,float16,0,0.12481066584587097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,1,128,1,float16,float16,0,2.246250629425049
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,1,128,1,float16,fp8,0,1.979680061340332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,2,128,1,float16,float16,0,2.299503962198893
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,2,128,1,float16,fp8,0,2.1058613459269204
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,48,128,1,float16,fp8,0,0.8120799859364828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,4,128,1,float16,fp8,0,2.4800373713175454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,48,128,1,float16,fp8,0,0.1260373294353485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,8,128,1,float16,float16,0,2.7069921493530273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,8,128,1,float16,fp8,0,2.555786609649658
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,1,128,1,float16,float16,0,1.1043840249379475
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,1,128,1,float16,fp8,0,1.0109439690907795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,48,128,1,float16,float16,0,1.983850638071696
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,48,128,1,float16,fp8,0,2.191472053527832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,2,128,1,float16,float16,0,1.1803253491719563
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,2,128,1,float16,fp8,0,1.0627893606821697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,4,128,1,float16,float16,0,1.3960906664530437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,1,128,1,float16,float16,0,0.24477332830429077
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,4,128,1,float16,float16,0,2.8335787455240884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,1,128,1,float16,float16,0,0.08338133494059245
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,4,128,1,float16,fp8,0,1.3727307319641113
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,1,128,1,float16,float16,0,0.5658560196558634
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,1,128,1,float16,fp8,0,0.5183306535085043
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,2,128,1,float16,float16,0,0.6030240058898926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,8,128,1,float16,float16,0,0.7017599741617838
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,8,128,1,float16,fp8,0,0.663424015045166
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,8,128,1,float16,float16,0,1.4122400283813477
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,2,128,1,float16,fp8,0,0.5456053415934244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,48,128,1,float16,float16,0,0.5153226852416992
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,1,128,1,float16,fp8,0,0.27619733413060504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,2,128,1,float16,float16,0,0.30852266152699787
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,8,128,1,float16,fp8,0,1.313541332880656
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,2,128,1,float16,fp8,0,0.2884213328361511
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,4,128,1,float16,float16,0,0.3604053258895874
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,4,128,1,float16,fp8,0,0.351151982943217
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,48,128,1,float16,float16,0,0.9991626739501953
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,48,128,1,float16,fp8,0,0.5675413211186727
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,8,128,1,float16,fp8,0,0.3525173266728719
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,1,128,1,float16,float16,0,0.16205333669980368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,48,128,1,float16,float16,0,0.3016586701075236
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,1,128,1,float16,fp8,0,0.15077867110570273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,2,128,1,float16,fp8,0,0.16077333688735962
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,4,128,1,float16,float16,0,0.19593065977096558
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,48,128,1,float16,fp8,0,1.109445333480835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,4,128,1,float16,fp8,0,0.18907199303309122
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,1,128,1,float16,float16,0,0.29176000754038495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,8,128,1,float16,float16,0,0.3604000012079875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,48,128,1,float16,fp8,0,0.2984000047047933
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,48,128,1,float16,float16,0,0.14949333667755127
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,2,128,1,float16,float16,0,0.1763413349787394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,1,128,1,float16,float16,0,0.09285333752632141
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,2,128,1,float16,float16,0,0.09694400429725647
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,2,128,1,float16,fp8,0,0.0897653301556905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,4,128,1,float16,fp8,0,0.10048000017801921
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,8,128,1,float16,float16,0,0.19927465915679932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,8,128,1,float16,float16,0,0.1079306701819102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,48,128,1,float16,fp8,0,0.16300800442695618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,1,128,1,float16,fp8,0,0.08683733145395915
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,48,128,1,float16,float16,0,0.09530666470527649
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,4,128,1,float16,float16,0,0.699951966603597
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,1,128,1,float16,fp8,0,0.055029332637786865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,8,128,1,float16,fp8,0,0.1925013264020284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,48,128,1,float16,fp8,0,0.0860746701558431
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,2,128,1,float16,float16,0,0.06031466523806254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,2,128,1,float16,fp8,0,0.05667200187842051
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,4,128,1,float16,float16,0,0.06355200211207072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,8,128,1,float16,fp8,0,0.10188266634941101
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,8,128,1,float16,fp8,0,0.06215466558933258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,4,128,1,float16,fp8,0,0.6686240037282308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,1,128,1,float16,float16,0,2.300410588582357
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,1,128,1,float16,fp8,0,2.1395947138468423
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,4,128,1,float16,fp8,0,0.06178666651248932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,2,128,1,float16,float16,0,2.4749600092569985
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,2,128,1,float16,fp8,0,2.321872075398763
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,4,128,1,float16,float16,0,0.10454400380452473
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,4,128,1,float16,fp8,0,2.8917439778645835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,8,128,1,float16,float16,0,0.06379200021425883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,8,128,1,float16,fp8,0,2.9040587743123374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,8,128,1,float16,float16,0,3.013183911641439
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,48,128,1,float16,float16,0,2.325178623199463
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,1,128,1,float16,float16,0,1.196133295694987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,1,128,1,float16,float16,0,0.058575997749964394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,2,128,1,float16,float16,0,1.2489333152770996
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,2,128,1,float16,fp8,0,1.163856029510498
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,4,128,1,float16,float16,0,1.60426664352417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,48,128,1,float16,fp8,0,2.659898598988851
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,8,128,1,float16,fp8,0,1.4647199312845867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,48,128,1,float16,float16,0,1.1678933302561443
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,4,128,1,float16,float16,0,3.1210079193115234
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,48,128,1,float16,fp8,0,1.3407146135965984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,2,128,1,float16,float16,0,0.6257546742757162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,4,128,1,float16,float16,0,0.7898666858673096
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,4,128,1,float16,fp8,0,1.5720693270365398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,8,128,1,float16,float16,0,0.8012906710306803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,8,128,1,float16,fp8,0,0.7463573614756266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,1,128,1,float16,fp8,0,1.0853493213653564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,1,128,1,float16,fp8,0,0.5562400023142496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,1,128,1,float16,float16,0,0.30899733304977417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,1,128,1,float16,fp8,0,0.293887992699941
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,48,128,1,float16,fp8,0,0.6805760065714518
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,2,128,1,float16,float16,0,0.32261866331100464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,2,128,1,float16,fp8,0,0.3126773238182068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,2,128,1,float16,fp8,0,0.5991946856180826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,8,128,1,float16,float16,0,1.569258689880371
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,4,128,1,float16,fp8,0,0.39320000012715656
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,8,128,1,float16,float16,0,0.39016000429789227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,1,128,1,float16,float16,0,0.16727999846140543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,1,128,1,float16,float16,0,0.5885973374048868
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,48,128,1,float16,float16,0,0.34242133299509686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,2,128,1,float16,float16,0,0.17620267470677695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,4,128,1,float16,float16,0,0.21688000361124674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,4,128,1,float16,fp8,0,0.21710934241612753
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,4,128,1,float16,fp8,0,0.7793707052866617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,4,128,1,float16,float16,0,0.4164693355560303
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,8,128,1,float16,fp8,0,0.38972798983256024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,8,128,1,float16,fp8,0,0.2197706699371338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,1,128,1,float16,float16,0,0.09410666426022847
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,48,128,1,float16,fp8,0,0.35321064790089923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,1,128,1,float16,fp8,0,0.08701866865158081
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,2,128,1,float16,fp8,0,0.09139200051625569
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,2,128,1,float16,fp8,0,0.17111466328303018
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,4,128,1,float16,float16,0,0.11801066994667053
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,4,128,1,float16,fp8,0,0.10757333040237427
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,8,128,1,float16,float16,0,0.11567999919255574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,8,128,1,float16,fp8,0,0.110042671362559
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,1,128,1,float16,fp8,0,0.16005333264668783
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,48,128,1,float16,fp8,0,0.1876586675643921
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,1,128,1,float16,float16,0,0.056143999099731445
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,2,128,1,float16,float16,0,0.10102933645248413
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,48,128,1,float16,float16,0,0.6140480041503906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,2,128,1,float16,float16,0,0.0580213318268458
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,4,128,1,float16,float16,0,0.06422933439413707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,8,128,1,float16,float16,0,0.21953066190083823
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,8,128,1,float16,float16,0,0.06452799836794536
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,8,128,1,float16,fp8,0,0.06427200138568878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,48,128,1,float16,float16,0,0.10168533523877461
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,48,128,1,float16,float16,0,0.16538666685422262
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,1,128,1,float16,float16,0,0.0396373321612676
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,1,128,1,float16,fp8,0,0.05298133194446564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,1,128,1,float16,fp8,0,0.03667200108369192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,2,128,1,float16,fp8,0,0.03769599894682566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,4,128,1,float16,fp8,0,0.041562666495641075
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,8,128,1,float16,float16,0,0.043706665436426796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,8,128,1,float16,fp8,0,0.04233600199222565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,48,128,1,float16,fp8,0,0.10499733686447144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,48,128,1,float16,float16,0,0.05292266607284546
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,48,128,1,float16,fp8,0,0.05678399900595347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,2,128,1,float16,float16,0,0.039893334110577904
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,2,128,1,float16,fp8,0,0.05504000186920166
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,4,128,1,float16,fp8,0,0.06217599908510844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,1,128,1,float16,fp8,0,1.650800069173177
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,2,128,1,float16,float16,0,1.8546719551086426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,4,128,1,float16,float16,0,2.4913652737935386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,4,128,1,float16,fp8,0,2.561941305796305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,4,128,1,float16,float16,0,0.04212800165017446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,8,128,1,float16,float16,0,2.4613919258117676
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,48,128,1,float16,float16,0,2.037365277608236
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,1,128,1,float16,float16,0,0.8493333657582601
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,48,128,1,float16,fp8,0,2.3938986460367837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,2,128,1,float16,float16,0,0.926741361618042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,1,128,1,float16,fp8,0,0.8374773661295573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,4,128,1,float16,float16,0,1.2372372945149739
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,2,128,1,float16,fp8,0,1.8114666938781738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,4,128,1,float16,fp8,0,1.3236746788024902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,8,128,1,float16,fp8,0,1.2862026691436768
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,8,128,1,float16,float16,0,1.2229173183441162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,1,128,1,float16,float16,0,1.6737066904703777
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,1,128,1,float16,fp8,0,0.4267786741256714
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,2,128,1,float16,fp8,0,0.4675893386205037
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,1,128,1,float16,float16,0,0.44490667184193927
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,48,128,1,float16,float16,0,1.0136000315348308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,2,128,1,float16,float16,0,0.469215989112854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,4,128,1,float16,fp8,0,0.6330933173497518
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,8,128,1,float16,float16,0,0.626255989074707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,2,128,1,float16,fp8,0,0.9148639837900797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,8,128,1,float16,fp8,0,0.6189866860707601
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,48,128,1,float16,fp8,0,0.6122133334477743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,2,128,1,float16,float16,0,0.2540266712506612
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,8,128,1,float16,fp8,0,2.5257387161254883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,2,128,1,float16,fp8,0,0.24548266331354776
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,4,128,1,float16,float16,0,0.617845336596171
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,4,128,1,float16,float16,0,0.33319467306137085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,8,128,1,float16,float16,0,0.3235146601994832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,4,128,1,float16,fp8,0,0.3482613166173299
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,48,128,1,float16,float16,0,0.2683466672897339
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,1,128,1,float16,float16,0,0.12802132964134216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,48,128,1,float16,fp8,0,0.31649599472681683
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,1,128,1,float16,fp8,0,0.1252959966659546
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,2,128,1,float16,float16,0,0.1356000006198883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,2,128,1,float16,fp8,0,0.13491732875506082
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,4,128,1,float16,float16,0,0.1769226590792338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,4,128,1,float16,fp8,0,0.1741066575050354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,8,128,1,float16,float16,0,0.17865600188573202
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,8,128,1,float16,fp8,0,0.33872000376383465
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,1,128,1,float16,float16,0,0.07177066802978516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,8,128,1,float16,fp8,0,0.18232532342274985
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,1,128,1,float16,fp8,0,0.06820799907048543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,48,128,1,float16,float16,0,0.5157386859258016
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,48,128,1,float16,fp8,0,0.1683466633160909
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,2,128,1,float16,fp8,0,0.07274666428565979
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,4,128,1,float16,float16,0,0.09471999605496724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,8,128,1,float16,float16,0,0.0953493316968282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,8,128,1,float16,fp8,0,0.09285333752632141
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,48,128,1,float16,fp8,0,0.0909440020720164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,48,128,1,float16,float16,0,0.08040533463160197
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,1,128,1,float16,float16,0,0.042506664991378784
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,1,128,1,float16,fp8,0,0.22533865769704184
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,48,128,1,float16,float16,0,0.14386666814486185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,2,128,1,float16,float16,0,0.043925335009892784
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,4,128,1,float16,float16,0,0.0498933345079422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,4,128,1,float16,fp8,0,0.09044266740481059
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,8,128,1,float16,float16,0,0.049925332268079124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,8,128,1,float16,fp8,0,0.05202666421731313
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,48,128,1,float16,float16,0,0.04164266586303711
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,1,128,1,float16,fp8,0,0.04189866781234741
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,2,128,1,float16,float16,0,0.07622933387756348
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,1,128,1,float16,fp8,0,0.028165332973003387
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,2,128,1,float16,fp8,0,0.029498666524887085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,4,128,1,float16,float16,0,0.03345600018898646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,4,128,1,float16,fp8,0,0.033589333295822144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,8,128,1,float16,float16,0,0.03373866776625315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,48,128,1,float16,fp8,0,0.048570667703946434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,1,128,1,float16,float16,0,0.23799999554951987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,1,128,1,float16,float16,0,0.029493334392706554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,2,128,1,float16,fp8,0,0.04374399781227112
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,1,128,1,float16,float16,0,0.02120000123977661
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,48,128,1,float16,fp8,0,0.03149333347876867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,2,128,1,float16,float16,0,0.021322667598724365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,2,128,1,float16,fp8,0,0.021162666380405426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,4,128,1,float16,float16,0,0.022261333962281544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,4,128,1,float16,fp8,0,0.022853332261244457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,8,128,1,float16,float16,0,0.02327999969323476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,8,128,1,float16,fp8,0,0.023285334308942158
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,8,128,1,float16,fp8,0,0.03363733241955439
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,48,128,1,float16,fp8,0,1.2076746622721355
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,48,128,1,float16,float16,0,0.025946666797002155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,1,128,1,float16,fp8,0,0.6981279850006104
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,2,128,1,float16,float16,0,0.7621119817097982
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,2,128,1,float16,fp8,0,0.7823092937469482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,4,128,1,float16,float16,0,1.1168800195058186
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,4,128,1,float16,fp8,0,1.0367680390675862
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,8,128,1,float16,float16,0,1.1154507001241047
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,4,128,1,float16,fp8,0,0.050527999798456825
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,1,128,1,float16,float16,0,0.6966453393300375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,8,128,1,float16,fp8,0,1.0782826741536458
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,1,128,1,float16,fp8,0,0.359386682510376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,2,128,1,float16,float16,0,0.3931093215942383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,1,128,1,float16,float16,0,0.3592640161514282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,48,128,1,float16,fp8,0,1.1361066500345867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,2,128,1,float16,fp8,0,0.40515732765197754
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,4,128,1,float16,float16,0,0.5461599826812744
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,2,128,1,float16,float16,0,0.030767999589443207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,4,128,1,float16,fp8,0,0.5747359991073608
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,8,128,1,float16,float16,0,0.5604319969813029
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,8,128,1,float16,fp8,0,0.5535093148549398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,48,128,1,float16,float16,0,0.47196801503499347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,48,128,1,float16,fp8,0,0.5773760080337524
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,2,128,1,float16,float16,0,0.20804266134897867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,48,128,1,float16,float16,0,0.9256213506062826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,4,128,1,float16,float16,0,0.2956053415934245
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,4,128,1,float16,fp8,0,0.3087573250134786
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,8,128,1,float16,fp8,0,0.2863786617914836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,8,128,1,float16,float16,0,0.2905706763267517
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,1,128,1,float16,fp8,0,0.021407999098300934
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,1,128,1,float16,float16,0,0.18890132506688437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,48,128,1,float16,float16,0,0.24702399969100952
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,1,128,1,float16,float16,0,0.10689600308736165
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,48,128,1,float16,fp8,0,0.29900266726811725
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,2,128,1,float16,float16,0,0.11425066987673442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,2,128,1,float16,fp8,0,0.11789333820343018
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,4,128,1,float16,fp8,0,0.15770133336385092
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,8,128,1,float16,float16,0,0.15745066603024802
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,1,128,1,float16,fp8,0,0.1874879995981852
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,1,128,1,float16,fp8,0,0.10576533277829488
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,1,128,1,float16,float16,0,0.06017066538333893
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,2,128,1,float16,float16,0,0.06514133512973785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,2,128,1,float16,fp8,0,0.2082293430964152
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,4,128,1,float16,float16,0,0.08496532837549846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,4,128,1,float16,fp8,0,0.07852266728878021
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,8,128,1,float16,fp8,0,0.1644266645113627
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,8,128,1,float16,float16,0,0.08682133754094441
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,8,128,1,float16,fp8,0,0.0817493349313736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,1,128,1,float16,float16,0,0.03518400092919668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,48,128,1,float16,fp8,0,0.08481066425641377
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,48,128,1,float16,float16,0,0.13401599725087485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,2,128,1,float16,float16,0,0.035573333501815796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,2,128,1,float16,fp8,0,0.036015999813874565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,4,128,1,float16,float16,0,0.14851199587186178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,4,128,1,float16,float16,0,0.04166933397452036
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,4,128,1,float16,fp8,0,0.04384533564249674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,8,128,1,float16,float16,0,0.04211199780305227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,8,128,1,float16,fp8,0,0.04379733403523763
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,1,128,1,float16,float16,0,0.025199999411900837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,1,128,1,float16,fp8,0,0.025402667621771496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,48,128,1,float16,float16,0,0.035775999228159584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,48,128,1,float16,float16,0,0.07253866891066234
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,2,128,1,float16,float16,0,0.02535466601451238
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,2,128,1,float16,fp8,0,0.025536000728607178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,1,128,1,float16,fp8,0,0.05795200169086456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,8,128,1,float16,float16,0,0.02935466667016347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,2,128,1,float16,fp8,0,0.06246933341026306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,8,128,1,float16,fp8,0,0.03091199944416682
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,1,128,1,float16,float16,0,0.01718933383623759
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,1,128,1,float16,fp8,0,0.01708799973130226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,48,128,1,float16,float16,0,0.02218666672706604
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,2,128,1,float16,float16,0,0.01711999997496605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,2,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,48,128,1,float16,fp8,0,0.044138665000597634
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,4,128,1,float16,fp8,0,0.019088000059127808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,8,128,1,float16,float16,0,0.019088000059127808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,4,128,1,float16,fp8,0,0.029311999678611755
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,8,128,1,float16,fp8,0,0.019280000279347103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,1,128,1,float16,float16,0,0.01714133347074191
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,1,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,48,128,1,float16,float16,0,0.01842133328318596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,2,128,1,float16,float16,0,0.01695466662446658
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,48,128,1,float16,fp8,0,0.027503999571005504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,2,128,1,float16,fp8,0,0.01718933383623759
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,4,128,1,float16,float16,0,0.0170666662355264
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,4,128,1,float16,fp8,0,0.017221332838137943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,4,128,1,float16,float16,0,0.01916266605257988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,48,128,1,float16,fp8,0,0.1587999959786733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,4,128,1,float16,float16,0,0.029178666571776073
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,1,128,1,float16,fp8,0,0.4265386660893758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,1,128,1,float16,float16,0,0.4227786858876546
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,2,128,1,float16,float16,0,0.46639466285705566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,2,128,1,float16,fp8,0,0.4721279939015706
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,8,128,1,float16,float16,0,0.01710933322707812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,4,128,1,float16,float16,0,0.604634682337443
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,4,128,1,float16,fp8,0,0.6420213381449381
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,8,128,1,float16,float16,0,0.6078506708145142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,8,128,1,float16,fp8,0,0.6151146491368612
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,1,128,1,float16,float16,0,0.2229493260383606
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,48,128,1,float16,fp8,0,0.021477334201335907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,48,128,1,float16,float16,0,0.5020800034205118
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,48,128,1,float16,fp8,0,0.6081813176472982
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,2,128,1,float16,fp8,0,0.24092266956965128
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,4,128,1,float16,float16,0,0.3235093355178833
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,8,128,1,float16,fp8,0,0.0173333336909612
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,1,128,1,float16,fp8,0,0.0335359995563825
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,8,128,1,float16,float16,0,0.30511999130249023
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,8,128,1,float16,fp8,0,0.31945600112279254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,1,128,1,float16,float16,0,0.12056533495585124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,48,128,1,float16,fp8,0,0.31227733691533405
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,1,128,1,float16,fp8,0,0.11889066298802693
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,2,128,1,float16,fp8,0,0.12844799955685934
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,4,128,1,float16,fp8,0,0.16799465815226236
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,4,128,1,float16,float16,0,0.17146132389704385
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,8,128,1,float16,fp8,0,0.16827734311421713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,4,128,1,float16,fp8,0,0.34510401884714764
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,48,128,1,float16,float16,0,0.13769599795341492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,1,128,1,float16,fp8,0,0.0642986645301183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,2,128,1,float16,float16,0,0.07251733541488647
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,2,128,1,float16,float16,0,0.2395626703898112
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,8,128,1,float16,float16,0,0.1698453426361084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,2,128,1,float16,fp8,0,0.06836266815662384
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,4,128,1,float16,fp8,0,0.0846613347530365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,1,128,1,float16,float16,0,0.06725866595904033
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,8,128,1,float16,fp8,0,0.08829333384831746
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,48,128,1,float16,fp8,0,0.16416000326474509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,1,128,1,float16,fp8,0,0.2214613358179728
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,48,128,1,float16,fp8,0,0.08874666690826416
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,48,128,1,float16,float16,0,0.07619733115037282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,1,128,1,float16,fp8,0,0.03884266565243403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,2,128,1,float16,float16,0,0.03977066775163015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,2,128,1,float16,fp8,0,0.040362666050593056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,4,128,1,float16,float16,0,0.0458133320013682
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,48,128,1,float16,float16,0,0.2590773304303487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,8,128,1,float16,float16,0,0.046021332343419395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,8,128,1,float16,fp8,0,0.049829334020614624
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,1,128,1,float16,float16,0,0.039333333571751915
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,48,128,1,float16,float16,0,0.03686933219432831
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,2,128,1,float16,float16,0,0.02513066679239273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,8,128,1,float16,float16,0,0.08880533774693807
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,2,128,1,float16,fp8,0,0.025285333395004272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,4,128,1,float16,fp8,0,0.04784533381462097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,4,128,1,float16,float16,0,0.027434666951497395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,2,128,1,float16,float16,0,0.1283626655737559
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,4,128,1,float16,fp8,0,0.02934933453798294
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,48,128,1,float16,fp8,0,0.02940800040960312
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,4,128,1,float16,float16,0,0.09004799524943034
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,48,128,1,float16,float16,0,0.02327466756105423
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,1,128,1,float16,float16,0,0.023589332898457844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,1,128,1,float16,fp8,0,0.018986667195955913
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,2,128,1,float16,float16,0,0.017530667285124462
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,4,128,1,float16,float16,0,0.01951466624935468
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,4,128,1,float16,fp8,0,0.02125866711139679
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,8,128,1,float16,float16,0,0.01931200052301089
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,8,128,1,float16,fp8,0,0.021210665504137676
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,8,128,1,float16,float16,0,0.027285332481066387
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,1,128,1,float16,float16,0,0.012991999586423239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,1,128,1,float16,fp8,0,0.013306666165590286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,48,128,1,float16,float16,0,0.01534933348496755
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,48,128,1,float16,fp8,0,0.04378133515516917
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,2,128,1,float16,float16,0,0.01312000056107839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,2,128,1,float16,fp8,0,0.019130667050679524
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,4,128,1,float16,fp8,0,0.014058666924635569
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,8,128,1,float16,float16,0,0.01414399966597557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,8,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,8,128,1,float16,fp8,0,0.029487999776999157
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,1,128,1,float16,float16,0,0.013151999562978745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,48,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,48,128,1,float16,fp8,0,0.01926400015751521
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,1,128,1,float16,fp8,0,0.023402666052182514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,2,128,1,float16,fp8,0,0.013242666920026144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,4,128,1,float16,float16,0,0.013093333691358566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,4,128,1,float16,float16,0,0.012991999586423239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,8,128,1,float16,float16,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,8,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,48,128,1,float16,float16,0,0.015082667271296183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,1,128,1,float16,fp8,0,0.013290667285521826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,1,128,1,float16,float16,0,0.3290453354517619
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,1,128,1,float16,float16,0,0.017818666994571686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,4,128,1,float16,fp8,0,0.013386666774749756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,2,128,1,float16,float16,0,0.34649066130320233
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,4,128,1,float16,float16,0,0.430735985438029
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,2,128,1,float16,fp8,0,0.35117868582407635
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,4,128,1,float16,fp8,0,0.4528319835662842
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,8,128,1,float16,float16,0,0.42320001125335693
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,2,128,1,float16,float16,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,8,128,1,float16,fp8,0,0.424074649810791
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,1,128,1,float16,float16,0,0.17811199029286703
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,48,128,1,float16,float16,0,0.32076799869537354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,4,128,1,float16,float16,0,0.2200266718864441
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,1,128,1,float16,fp8,0,0.1739573280016581
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,2,128,1,float16,fp8,0,0.18359466393788657
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,8,128,1,float16,float16,0,0.22592000166575113
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,8,128,1,float16,fp8,0,0.22914665937423706
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,48,128,1,float16,float16,0,0.1793066660563151
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,2,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,48,128,1,float16,fp8,0,0.18999467293421426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,2,128,1,float16,float16,0,0.10097066561381023
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,2,128,1,float16,float16,0,0.1856266657511393
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,1,128,1,float16,fp8,0,0.09079999725023906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,2,128,1,float16,fp8,0,0.09674666325251262
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,4,128,1,float16,fp8,0,0.11400000254313152
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,8,128,1,float16,float16,0,0.11683733264605205
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,8,128,1,float16,fp8,0,0.11595732967058818
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,1,128,1,float16,float16,0,0.052517334620157875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,1,128,1,float16,fp8,0,0.32891199986139935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,1,128,1,float16,fp8,0,0.05128000179926554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,48,128,1,float16,fp8,0,0.363754669825236
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,2,128,1,float16,float16,0,0.05388799806435903
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,2,128,1,float16,fp8,0,0.053360000252723694
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,4,128,1,float16,fp8,0,0.060138667623202004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,8,128,1,float16,float16,0,0.05916800101598104
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,4,128,1,float16,float16,0,0.05975999931494395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,4,128,1,float16,fp8,0,0.2290133237838745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,48,128,1,float16,float16,0,0.047055999437967934
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,1,128,1,float16,float16,0,0.03138133386770884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,1,128,1,float16,float16,0,0.09541333715120952
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,2,128,1,float16,float16,0,0.03345600018898646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,1,128,1,float16,fp8,0,0.031504000226656594
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,4,128,1,float16,float16,0,0.03570133447647095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,4,128,1,float16,float16,0,0.11683199803034465
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,8,128,1,float16,fp8,0,0.06154666841030121
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,8,128,1,float16,fp8,0,0.03664533297220866
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,48,128,1,float16,fp8,0,0.05123733480771383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,48,128,1,float16,fp8,0,0.1011306643486023
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,2,128,1,float16,fp8,0,0.03268799930810928
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,48,128,1,float16,float16,0,0.02533866713444392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,4,128,1,float16,fp8,0,0.035536001125971474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,48,128,1,float16,fp8,0,0.03146666785081228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,2,128,1,float16,fp8,0,0.020746666938066483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,4,128,1,float16,float16,0,0.021530665457248688
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,8,128,1,float16,float16,0,0.02186133215824763
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,8,128,1,float16,fp8,0,0.023344000180562336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,48,128,1,float16,float16,0,0.09620267152786255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,48,128,1,float16,float16,0,0.017562666287024815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,1,128,1,float16,fp8,0,0.020234666764736176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,1,128,1,float16,float16,0,0.021151999632517498
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,2,128,1,float16,float16,0,0.015392000476519266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,2,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,4,128,1,float16,float16,0,0.015471999843915304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,4,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,8,128,1,float16,float16,0,0.015749332805474598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,8,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,48,128,1,float16,fp8,0,0.02117866774400075
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,2,128,1,float16,float16,0,0.021242665747801464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,1,128,1,float16,float16,0,0.01292266696691513
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,1,128,1,float16,fp8,0,0.01616000011563301
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,1,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,2,128,1,float16,fp8,0,0.013199999928474426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,4,128,1,float16,float16,0,0.013061333447694778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,4,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,8,128,1,float16,float16,0,0.013141332815090815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,8,128,1,float16,fp8,0,0.013232000172138214
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,48,128,1,float16,float16,0,0.01314666618903478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,48,128,1,float16,fp8,0,0.015392000476519266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,8,128,1,float16,float16,0,0.035455999275048576
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,1,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,4,128,1,float16,fp8,0,0.02126399924357732
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,2,128,1,float16,float16,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,4,128,1,float16,float16,0,0.011365332951148352
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,8,128,1,float16,float16,0,0.012240000069141388
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,8,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,1,128,1,float16,float16,0,0.015909332782030106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,1,128,1,float16,float16,0,0.28440000613530475
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,1,128,1,float16,float16,0,0.01209066684047381
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,4,128,1,float16,fp8,0,0.012789333860079447
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,1,128,1,float16,fp8,0,0.28019734223683673
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,2,128,1,float16,float16,0,0.2914346655209859
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,2,128,1,float16,fp8,0,0.2897119919459025
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,4,128,1,float16,float16,0,0.3266986608505249
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,8,128,1,float16,float16,0,0.33442668120066327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,48,128,1,float16,float16,0,0.013850666582584381
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,48,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,8,128,1,float16,fp8,0,0.33178667227427167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,48,128,1,float16,float16,0,0.22866666316986084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,1,128,1,float16,float16,0,0.15434666474660239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,48,128,1,float16,fp8,0,0.24394667148590088
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,1,128,1,float16,fp8,0,0.1461120049158732
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,4,128,1,float16,float16,0,0.17357865969340006
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,4,128,1,float16,fp8,0,0.17162134250005087
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,8,128,1,float16,float16,0,0.17249067624409994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,2,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,1,128,1,float16,float16,0,0.08118399977684021
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,48,128,1,float16,float16,0,0.12390399972597758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,48,128,1,float16,fp8,0,0.12966932853062949
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,2,128,1,float16,float16,0,0.16008533040682474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,1,128,1,float16,fp8,0,0.078575998544693
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,2,128,1,float16,float16,0,0.08361599842707317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,4,128,1,float16,fp8,0,0.08666132887204488
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,4,128,1,float16,fp8,0,0.33053867022196454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,8,128,1,float16,float16,0,0.09009599685668945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,2,128,1,float16,float16,0,0.013023999830087027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,8,128,1,float16,fp8,0,0.08714666962623596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,1,128,1,float16,float16,0,0.04577599962552389
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,48,128,1,float16,fp8,0,0.06454400221506755
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,2,128,1,float16,float16,0,0.047135998805363975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,48,128,1,float16,float16,0,0.06486399968465169
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,4,128,1,float16,float16,0,0.04941866795221964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,4,128,1,float16,fp8,0,0.04987733562787374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,2,128,1,float16,fp8,0,0.08025066554546356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,8,128,1,float16,float16,0,0.04994133114814758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,8,128,1,float16,fp8,0,0.17494400342305502
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,48,128,1,float16,fp8,0,0.037989333271980286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,48,128,1,float16,float16,0,0.03378133227427801
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,1,128,1,float16,fp8,0,0.04394133388996124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,1,128,1,float16,float16,0,0.029482667644818623
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,2,128,1,float16,fp8,0,0.02827200045188268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,4,128,1,float16,float16,0,0.02956799914439519
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,2,128,1,float16,fp8,0,0.0459146648645401
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,4,128,1,float16,fp8,0,0.029994666576385498
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,2,128,1,float16,fp8,0,0.1516480048497518
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,8,128,1,float16,fp8,0,0.04930666585763296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,1,128,1,float16,float16,0,0.01916266605257988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,48,128,1,float16,float16,0,0.021327999730904896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,1,128,1,float16,fp8,0,0.019280000279347103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,2,128,1,float16,float16,0,0.01941866676012675
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,1,128,1,float16,fp8,0,0.02752533306678136
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,2,128,1,float16,float16,0,0.029440000653266907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,4,128,1,float16,float16,0,0.019317333896954853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,8,128,1,float16,float16,0,0.019146667172511418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,8,128,1,float16,fp8,0,0.01934933289885521
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,4,128,1,float16,float16,0,0.08962666988372803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,8,128,1,float16,fp8,0,0.029733332494894665
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,1,128,1,float16,float16,0,0.015098666151364645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,48,128,1,float16,fp8,0,0.01728533332546552
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,48,128,1,float16,fp8,0,0.023605334262053173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,2,128,1,float16,fp8,0,0.019280000279347103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,2,128,1,float16,float16,0,0.015210667004187902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,4,128,1,float16,float16,0,0.015146666516860327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,4,128,1,float16,fp8,0,0.019178666174411774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,8,128,1,float16,float16,0,0.015226667126019796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,8,128,1,float16,fp8,0,0.015279999623696009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,8,128,1,float16,float16,0,0.029872000217437744
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,48,128,1,float16,float16,0,0.017258666455745697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,48,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,1,128,1,float16,float16,0,0.011306667079528173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,2,128,1,float16,float16,0,0.011301333705584208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,1,128,1,float16,fp8,0,0.015263999501864115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,4,128,1,float16,float16,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,4,128,1,float16,fp8,0,0.0129120002190272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,8,128,1,float16,float16,0,0.011717333147923151
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,4,128,1,float16,fp8,0,0.014965333044528961
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,8,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,1,128,1,float16,float16,0,0.011066666493813196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,48,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,48,128,1,float16,float16,0,0.01303999995191892
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,2,128,1,float16,fp8,0,0.011242666592200598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,4,128,1,float16,float16,0,0.012234666695197424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,48,128,1,float16,float16,0,0.012970666090647379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,1,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,8,128,1,float16,float16,0,0.011221333096424738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,8,128,1,float16,fp8,0,0.011690666278203329
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,2,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,1,128,1,float16,fp8,0,0.25219200054804486
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,1,128,1,float16,fp8,0,0.011125333607196808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,2,128,1,float16,fp8,0,0.26077866554260254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,4,128,1,float16,fp8,0,0.011973333855470022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,4,128,1,float16,float16,0,0.29258133967717487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,4,128,1,float16,fp8,0,0.28043200572331745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,2,128,1,float16,fp8,0,0.015141333142916361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,1,128,1,float16,float16,0,0.27137066920598346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,1,128,1,float16,float16,0,0.14325333635012308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,8,128,1,float16,float16,0,0.2954453428586324
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,1,128,1,float16,fp8,0,0.1328159968058268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,2,128,1,float16,fp8,0,0.13562666376431784
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,4,128,1,float16,float16,0,0.14990933736165366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,4,128,1,float16,fp8,0,0.1418400009473165
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,2,128,1,float16,float16,0,0.2748639980951945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,8,128,1,float16,fp8,0,0.281658669312795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,8,128,1,float16,fp8,0,0.14358400305112204
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,48,128,1,float16,fp8,0,0.1822986602783203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,48,128,1,float16,fp8,0,0.09630399942398071
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,48,128,1,float16,float16,0,0.09775466720263164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,1,128,1,float16,fp8,0,0.07052800059318542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,2,128,1,float16,float16,0,0.07987200220425923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,2,128,1,float16,fp8,0,0.07251200079917908
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,4,128,1,float16,float16,0,0.080335999528567
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,4,128,1,float16,fp8,0,0.07658133407433827
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,8,128,1,float16,float16,0,0.08028266827265422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,8,128,1,float16,float16,0,0.156058669090271
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,8,128,1,float16,fp8,0,0.07543466488520305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,1,128,1,float16,fp8,0,0.041434665520985924
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,48,128,1,float16,fp8,0,0.052005335688591
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,1,128,1,float16,float16,0,0.07890133559703827
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,2,128,1,float16,float16,0,0.04393066465854645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,2,128,1,float16,fp8,0,0.04171733558177948
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,4,128,1,float16,float16,0,0.047770669062932335
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,4,128,1,float16,fp8,0,0.04385066529115041
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,8,128,1,float16,fp8,0,0.04428799947102865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,48,128,1,float16,float16,0,0.1855199933052063
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,48,128,1,float16,float16,0,0.030005333324273426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,1,128,1,float16,float16,0,0.04580800235271454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,1,128,1,float16,float16,0,0.027834666272004444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,48,128,1,float16,fp8,0,0.032111999889214836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,2,128,1,float16,fp8,0,0.027514666318893433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,8,128,1,float16,float16,0,0.04586133360862732
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,4,128,1,float16,fp8,0,0.027274665733178455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,8,128,1,float16,float16,0,0.027424000203609467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,2,128,1,float16,float16,0,0.011007999380429586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,48,128,1,float16,float16,0,0.02090666691462199
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,1,128,1,float16,float16,0,0.019248000035683315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,48,128,1,float16,fp8,0,0.021327999730904896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,1,128,1,float16,fp8,0,0.017877332866191864
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,4,128,1,float16,float16,0,0.027402666707833607
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,2,128,1,float16,float16,0,0.019248000035683315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,2,128,1,float16,fp8,0,0.017743999759356182
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,4,128,1,float16,float16,0,0.019141333798567455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,8,128,1,float16,fp8,0,0.026922665536403656
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,48,128,1,float16,float16,0,0.016000000139077503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,2,128,1,float16,float16,0,0.145797332127889
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,1,128,1,float16,fp8,0,0.02661866694688797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,48,128,1,float16,fp8,0,0.01739199956258138
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,2,128,1,float16,float16,0,0.014965333044528961
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,4,128,1,float16,fp8,0,0.01811733345190684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,2,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,8,128,1,float16,float16,0,0.019194666296243668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,4,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,8,128,1,float16,float16,0,0.015168000012636185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,8,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,48,128,1,float16,float16,0,0.05221333106358846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,48,128,1,float16,float16,0,0.012970666090647379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,1,128,1,float16,float16,0,0.015024000157912573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,1,128,1,float16,fp8,0,0.012026666353146235
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,2,128,1,float16,float16,0,0.01157333329319954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,8,128,1,float16,fp8,0,0.01746133342385292
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,2,128,1,float16,fp8,0,0.01129066695769628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,4,128,1,float16,float16,0,0.015013333410024643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,4,128,1,float16,float16,0,0.010965333630641302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,4,128,1,float16,fp8,0,0.012448000411192576
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,8,128,1,float16,fp8,0,0.012367999802033106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,8,128,1,float16,float16,0,0.011205332974592844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,2,128,1,float16,float16,0,0.027669332921504974
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,48,128,1,float16,float16,0,0.013066666821638743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,1,128,1,float16,fp8,0,0.01129066695769628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,1,128,1,float16,float16,0,0.011071999867757162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,2,128,1,float16,float16,0,0.01091733326514562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,4,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,8,128,1,float16,float16,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,8,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,48,128,1,float16,fp8,0,0.015141333142916361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,48,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,1,128,1,float16,fp8,0,0.015216000378131866
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,1,128,1,float16,float16,0,0.015024000157912573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,2,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,1,128,1,float16,fp8,0,0.013242666920026144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,4,128,1,float16,float16,0,0.025221332907676697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,0,0.02117866774400075
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,8,128,1,float16,float16,0,0.029445332785447437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,2,128,1,float16,fp8,0,0.01960533360640208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,1,128,1,float16,float16,0,0.011296000331640244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,0,0.026133333643277485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,1,128,1,float16,float16,0,0.011343999455372492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,1,128,1,float16,fp8,0,0.01116266722480456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,48,128,1,float16,float16,0,0.08235733211040497
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,2,128,1,float16,fp8,0,0.0138026662170887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,0,0.016783999900023144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,0,0.019189332922299702
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,0,0.017029333859682083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,1,128,1,float16,float16,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,2,128,1,float16,float16,0,0.021231998999913532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,48,128,1,float16,fp8,0,0.03583999971548716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,1,128,1,float16,fp8,0,0.011168000598748526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,2,128,1,float16,float16,0,0.011370666325092316
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,48,128,1,float16,fp8,0,0.06528000036875407
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,0,0.01322666679819425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,0,0.015274666249752045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,1,128,1,float16,float16,0,0.010938666760921478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,48,128,1,float16,float16,0,0.025578667720158894
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,1,128,1,float16,fp8,0,0.011087999989589056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,2,128,1,float16,fp8,0,0.011407999942700068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,0,0.011152000476916632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,0,0.01089599976936976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,0,0.015061333775520325
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,48,128,1,float16,fp8,0,0.023413332800070446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,1,128,1,float16,float16,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,1,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,2,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,2,128,1,float16,float16,0,0.011130666981140772
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,2,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,0,0.009114666531483332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,0,0.009082666908701261
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,0,0.009152000149091085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,48,128,1,float16,float16,0,0.04715733230113983
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,48,128,1,float16,float16,0,0.01728533332546552
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,1,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,48,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,1,128,1,float16,fp8,0,0.010933333386977514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,2,128,1,float16,float16,0,0.010954666882753372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,2,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,0,0.0107893335322539
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,0,0.009328000247478485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,0,0.009279999881982803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,2,128,1,float16,float16,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,2,128,1,float16,float16,0,0.015087999403476715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,48,128,1,float16,float16,0,0.013162666310866674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,48,128,1,float16,float16,0,0.013061333447694778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,1,128,1,float16,fp8,0,0.010597333312034607
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,0,0.00961599995692571
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,2,128,1,float16,fp8,0,0.011215999722480774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,0,0.010048000141978264
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,0,0.009045333291093508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,48,128,1,float16,fp8,0,0.01830400029818217
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,0,0.00921066664159298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,4,128,1,float16,float16,0,0.010981333752473196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,1,128,1,float16,float16,0,0.010879999647537867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,48,128,1,float16,fp8,0,0.01349866638580958
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,0,0.008949333180983862
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,0,0.009237333511312803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,0,0.00897066667675972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,0,0.00949866697192192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,0,0.009226666763424873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,0,0.009573333586255709
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,48,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,1,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,2,128,1,float16,float16,0,0.011007999380429586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,1,128,1,float16,float16,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,2,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,2,128,1,float16,float16,0,0.010911999891201654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,2,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,0,0.009359999870260557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,0,0.010389333590865135
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,0,0.00916800027092298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,1,128,1,float16,float16,0,0.010944000134865442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,48,128,1,float16,float16,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,48,128,1,float16,fp8,0,0.012442667037248611
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,1,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,48,128,1,float16,float16,0,0.010944000134865442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,1,128,1,float16,fp8,0,14.330650329589844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,2,128,1,float16,fp8,0,14.395125071207682
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,2,128,1,float16,float16,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,1,128,1,float16,float16,0,19.389541625976562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,2,128,1,float16,float16,0,19.519775390625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,4,128,1,float16,float16,0,20.100303649902344
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,4,128,1,float16,fp8,0,14.970228830973307
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,1,128,1,float16,fp8,0,7.139024098714192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,40,128,1,float16,fp8,0,8.374863942464193
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,1,128,1,float16,float16,0,9.67420768737793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,40,128,1,float16,float16,0,10.46116828918457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,8,128,1,float16,float16,0,20.08295440673828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,2,128,1,float16,float16,0,9.71066157023112
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,2,128,1,float16,fp8,0,7.182181040445964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,40,128,1,float16,float16,0,4.975786526997884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,40,128,1,float16,fp8,0,4.3761599858601885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,4,128,1,float16,float16,0,10.214410781860352
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,8,128,1,float16,fp8,0,7.443029403686523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,1,128,1,float16,fp8,0,3.7141812642415366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,2,128,1,float16,float16,0,4.474368095397949
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,4,128,1,float16,float16,0,4.654213269551595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,1,128,1,float16,float16,0,4.491685231526692
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,8,128,1,float16,fp8,0,14.866133371988932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,4,128,1,float16,fp8,0,3.7185866038004556
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,8,128,1,float16,float16,0,4.857114791870117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,8,128,1,float16,fp8,0,3.885093371073405
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,40,128,1,float16,float16,0,2.5871307055155435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,40,128,1,float16,fp8,0,2.1954399744669595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,1,128,1,float16,float16,0,2.244138717651367
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,1,128,1,float16,fp8,0,2.0340479214986167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,4,128,1,float16,fp8,0,7.366432189941406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,2,128,1,float16,float16,0,2.308154741923014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,2,128,1,float16,fp8,0,2.1893332799275718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,4,128,1,float16,float16,0,2.3215786616007485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,8,128,1,float16,float16,0,10.246458689371744
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,8,128,1,float16,float16,0,2.3665760358174643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,8,128,1,float16,fp8,0,1.93503999710083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,2,128,1,float16,fp8,0,3.9748051961263022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,4,128,1,float16,fp8,0,2.0974666277567544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,1,128,1,float16,fp8,0,8.285941441853842
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,2,128,1,float16,fp8,0,8.351114908854166
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,1,128,1,float16,float16,0,11.041605631510416
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,2,128,1,float16,float16,0,11.382512410481771
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,4,128,1,float16,fp8,0,8.600197474161783
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,4,128,1,float16,float16,0,11.597941080729166
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,8,128,1,float16,float16,0,11.590340932210287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,1,128,1,float16,fp8,0,4.268762588500977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,8,128,1,float16,fp8,0,8.560149510701498
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,2,128,1,float16,float16,0,5.139818509419759
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,4,128,1,float16,fp8,0,4.324997266133626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,4,128,1,float16,float16,0,5.491397221883138
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,8,128,1,float16,float16,0,5.4336903889973955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,8,128,1,float16,fp8,0,4.594367980957031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,2,128,1,float16,fp8,0,4.39082145690918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,40,128,1,float16,float16,0,2.9687201182047525
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,1,128,1,float16,fp8,0,2.0824000040690103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,1,128,1,float16,float16,0,2.8472798665364585
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,2,128,1,float16,float16,0,2.615253289540609
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,2,128,1,float16,fp8,0,2.201039950052897
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,4,128,1,float16,fp8,0,2.348677317301432
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,4,128,1,float16,float16,0,2.6789067586263022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,40,128,1,float16,fp8,0,5.137184143066406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,8,128,1,float16,fp8,0,2.226842721303304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,8,128,1,float16,float16,0,2.722405433654785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,1,128,1,float16,float16,0,1.3185706933339436
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,1,128,1,float16,fp8,0,1.0899466673533122
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,2,128,1,float16,float16,0,1.3536319732666016
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,2,128,1,float16,fp8,0,1.1107146739959717
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,4,128,1,float16,float16,0,1.3742027282714844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,4,128,1,float16,fp8,0,1.2205440203348796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,1,128,1,float16,float16,0,5.109477361043294
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,40,128,1,float16,float16,0,1.5130666097005208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,40,128,1,float16,fp8,0,3.1215413411458335
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,1,128,1,float16,float16,0,7.735205332438151
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,1,128,1,float16,fp8,0,5.775487899780273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,40,128,1,float16,float16,0,6.052778879801433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,2,128,1,float16,fp8,0,5.852607727050781
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,40,128,1,float16,fp8,0,1.3900052706400554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,2,128,1,float16,float16,0,7.848682403564453
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,4,128,1,float16,fp8,0,6.142576217651367
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,4,128,1,float16,float16,0,7.7794984181722
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,8,128,1,float16,float16,0,1.3829387029012044
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,8,128,1,float16,float16,0,7.968218485514323
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,8,128,1,float16,fp8,0,6.229951858520508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,40,128,1,float16,fp8,0,4.05782413482666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,1,128,1,float16,float16,0,3.6193599700927734
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,1,128,1,float16,fp8,0,3.1214186350504556
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,2,128,1,float16,float16,0,3.606656074523926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,2,128,1,float16,fp8,0,3.4331251780192056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,4,128,1,float16,float16,0,3.8376906712849936
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,4,128,1,float16,fp8,0,3.1290718714396157
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,8,128,1,float16,fp8,0,1.1723199685414631
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,8,128,1,float16,float16,0,3.876288096110026
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,8,128,1,float16,fp8,0,3.1514879862467446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,40,128,1,float16,float16,0,2.1656106313069663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,1,128,1,float16,float16,0,1.8060213724772136
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,1,128,1,float16,fp8,0,1.490005334218343
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,40,128,1,float16,float16,0,4.232373237609863
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,2,128,1,float16,fp8,0,1.5148693720499675
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,4,128,1,float16,float16,0,1.937162717183431
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,8,128,1,float16,float16,0,1.9001760482788086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,8,128,1,float16,fp8,0,1.6377439498901367
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,1,128,1,float16,float16,0,0.9375946521759033
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,1,128,1,float16,fp8,0,0.7889760335286459
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,40,128,1,float16,fp8,0,1.019274632136027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,2,128,1,float16,float16,0,0.9766879876454672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,4,128,1,float16,float16,0,0.9929973284403483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,40,128,1,float16,fp8,0,2.024346669514974
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,2,128,1,float16,float16,0,1.9241493542989094
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,8,128,1,float16,fp8,0,0.860309362411499
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,4,128,1,float16,fp8,0,1.8775466283162434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,40,128,1,float16,float16,0,1.124288002649943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,2,128,1,float16,fp8,0,0.8165919780731201
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,4,128,1,float16,fp8,0,0.8524426619211832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,8,128,1,float16,float16,0,1.0172906716664631
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,1,128,1,float16,fp8,0,7.48524792989095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,1,128,1,float16,float16,0,10.35429318745931
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,2,128,1,float16,fp8,0,7.764853159586589
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,2,128,1,float16,float16,0,10.565770467122396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,4,128,1,float16,float16,0,10.90552012125651
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,4,128,1,float16,fp8,0,8.404720306396484
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,1,128,1,float16,float16,0,4.892858823140462
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,40,128,1,float16,float16,0,5.839712142944336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,8,128,1,float16,fp8,0,8.392639795939127
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,8,128,1,float16,float16,0,10.922682444254557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,2,128,1,float16,fp8,0,3.855589230855306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,2,128,1,float16,float16,0,4.898533185323079
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,4,128,1,float16,fp8,0,4.385354677836101
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,8,128,1,float16,float16,0,5.079392115275065
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,1,128,1,float16,fp8,0,3.918095906575521
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,8,128,1,float16,fp8,0,4.358933448791504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,40,128,1,float16,float16,0,2.8727146784464517
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,1,128,1,float16,float16,0,2.3876214027404785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,1,128,1,float16,fp8,0,2.2578345934549966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,2,128,1,float16,float16,0,2.3662400245666504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,4,128,1,float16,fp8,0,2.1736532847086587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,4,128,1,float16,float16,0,5.327578544616699
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,8,128,1,float16,float16,0,2.5817333857218423
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,40,128,1,float16,fp8,0,2.8781439463297525
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,1,128,1,float16,float16,0,1.3013439973195393
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,40,128,1,float16,fp8,0,1.3624746004740398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,1,128,1,float16,fp8,0,0.9984532992045084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,2,128,1,float16,fp8,0,1.9765332539876301
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,2,128,1,float16,fp8,0,1.0247413317362468
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,4,128,1,float16,float16,0,2.5070133209228516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,4,128,1,float16,float16,0,1.311637322107951
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,8,128,1,float16,fp8,0,2.281770706176758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,8,128,1,float16,fp8,0,1.1000266869862874
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,40,128,1,float16,float16,0,1.49618132909139
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,40,128,1,float16,fp8,0,0.7195626894632975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,1,128,1,float16,fp8,0,0.5399519999821981
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,2,128,1,float16,float16,0,1.24944003423055
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,2,128,1,float16,float16,0,0.6346026659011841
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,4,128,1,float16,float16,0,0.6856213410695394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,4,128,1,float16,fp8,0,1.1001280148824055
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,4,128,1,float16,fp8,0,0.5954986810684204
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,8,128,1,float16,float16,0,1.2833493550618489
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,8,128,1,float16,fp8,0,0.590768019358317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,40,128,1,float16,fp8,0,5.349114735921224
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,2,128,1,float16,fp8,0,0.5488320191701254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,1,128,1,float16,float16,0,5.720160166422526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,2,128,1,float16,float16,0,5.577765146891276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,2,128,1,float16,fp8,0,4.557733217875163
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,8,128,1,float16,float16,0,0.6877013047536215
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,1,128,1,float16,float16,0,0.6235040028889974
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,4,128,1,float16,float16,0,6.270666758219401
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,4,128,1,float16,fp8,0,5.047583897908528
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,1,128,1,float16,fp8,0,4.434639930725098
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,8,128,1,float16,fp8,0,5.01089604695638
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,40,128,1,float16,float16,0,3.6199680964152017
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,40,128,1,float16,fp8,0,3.3275734583536782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,1,128,1,float16,fp8,0,2.251194636027018
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,40,128,1,float16,float16,0,0.7718719641367594
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,2,128,1,float16,fp8,0,2.316271940867106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,4,128,1,float16,fp8,0,2.5848800341288247
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,4,128,1,float16,float16,0,3.0740534464518228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,8,128,1,float16,float16,0,2.9619201024373374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,8,128,1,float16,fp8,0,2.5448586146036782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,8,128,1,float16,float16,0,6.186176300048828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,40,128,1,float16,float16,0,1.7819679578145344
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,40,128,1,float16,fp8,0,1.693717320760091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,1,128,1,float16,float16,0,2.7672907511393228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,2,128,1,float16,float16,0,2.749765396118164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,4,128,1,float16,float16,0,1.508453369140625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,4,128,1,float16,fp8,0,1.3043786684672039
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,8,128,1,float16,float16,0,1.5375626881917317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,8,128,1,float16,fp8,0,1.3120160102844238
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,1,128,1,float16,float16,0,0.7190879980723063
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,1,128,1,float16,fp8,0,0.60807998975118
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,40,128,1,float16,float16,0,0.9124746322631836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,2,128,1,float16,float16,0,0.7316746711730957
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,1,128,1,float16,fp8,0,1.2685173352559407
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,2,128,1,float16,fp8,0,0.6258666515350342
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,2,128,1,float16,fp8,0,1.1866880257924397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,4,128,1,float16,fp8,0,0.6885279814402262
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,8,128,1,float16,float16,0,0.7741599877675375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,8,128,1,float16,fp8,0,0.6974186897277832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,1,128,1,float16,float16,0,0.3805653254191081
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,40,128,1,float16,fp8,0,0.4726560115814209
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,1,128,1,float16,fp8,0,0.33291733264923096
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,1,128,1,float16,float16,0,1.4169386227925618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,2,128,1,float16,fp8,0,0.33898667494455975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,2,128,1,float16,float16,0,1.4281919797261555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,4,128,1,float16,float16,0,0.418720006942749
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,4,128,1,float16,fp8,0,0.37673600514729816
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,8,128,1,float16,float16,0,0.4174400170644124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,8,128,1,float16,fp8,0,0.3838293155034383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,40,128,1,float16,float16,0,0.48442665735880536
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,40,128,1,float16,fp8,0,0.8782666524251302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,2,128,1,float16,float16,0,0.38497066497802734
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,1,128,1,float16,float16,0,5.387231826782227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,2,128,1,float16,fp8,0,4.45525328318278
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,2,128,1,float16,float16,0,5.575018564860026
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,4,128,1,float16,float16,0,5.999909083048503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,8,128,1,float16,float16,0,5.961925506591797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,40,128,1,float16,float16,0,3.646671930948893
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,40,128,1,float16,fp8,0,3.763941446940104
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,4,128,1,float16,float16,0,0.792458693186442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,1,128,1,float16,float16,0,2.556719938913981
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,1,128,1,float16,fp8,0,2.2805919647216797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,2,128,1,float16,float16,0,2.627120018005371
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,2,128,1,float16,fp8,0,2.2546133995056152
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,4,128,1,float16,float16,0,3.018218676249186
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,4,128,1,float16,fp8,0,5.217013359069824
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,4,128,1,float16,fp8,0,2.58785072962443
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,8,128,1,float16,fp8,0,5.188261349995931
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,1,128,1,float16,float16,0,1.2878186702728271
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,1,128,1,float16,fp8,0,1.1123092969258626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,2,128,1,float16,fp8,0,1.1481119791666667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,4,128,1,float16,float16,0,1.4666666984558105
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,4,128,1,float16,fp8,0,1.2941493193308513
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,8,128,1,float16,float16,0,1.5000319480895996
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,1,128,1,float16,fp8,0,4.288026809692383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,8,128,1,float16,fp8,0,2.71833070119222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,40,128,1,float16,float16,0,1.8839252789815266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,1,128,1,float16,float16,0,0.6627039909362793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,2,128,1,float16,float16,0,1.336330731709798
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,1,128,1,float16,fp8,0,0.577834685643514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,40,128,1,float16,fp8,0,0.9360746542612711
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,2,128,1,float16,float16,0,0.6766613324483236
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,4,128,1,float16,fp8,0,0.7043146292368571
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,8,128,1,float16,float16,0,0.7428320248921713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,8,128,1,float16,fp8,0,1.3043786684672039
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,8,128,1,float16,float16,0,2.9720961252848306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,40,128,1,float16,float16,0,0.49379201730092365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,1,128,1,float16,float16,0,0.3511679967244466
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,40,128,1,float16,float16,0,0.9341119925181071
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,2,128,1,float16,float16,0,0.3589013417561849
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,2,128,1,float16,fp8,0,0.5992586612701416
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,4,128,1,float16,float16,0,0.39236799875895184
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,4,128,1,float16,float16,0,0.7778666814168295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,4,128,1,float16,fp8,0,0.36872533957163495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,8,128,1,float16,float16,0,0.40157334009806317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,8,128,1,float16,fp8,0,0.37619201342264813
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,1,128,1,float16,float16,0,0.19612799088160196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,40,128,1,float16,float16,0,0.2764853239059448
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,40,128,1,float16,fp8,0,0.27131199836730957
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,40,128,1,float16,fp8,0,0.49439998467763263
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,2,128,1,float16,float16,0,0.20109866062800089
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,4,128,1,float16,float16,0,0.21705599625905356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,1,128,1,float16,fp8,0,0.31539199749628705
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,4,128,1,float16,fp8,0,0.19983466466267905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,8,128,1,float16,float16,0,0.22188800573349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,8,128,1,float16,fp8,0,0.20152000586191812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,8,128,1,float16,fp8,0,0.6867093245188395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,40,128,1,float16,fp8,0,1.8841867446899414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,1,128,1,float16,float16,0,3.066234588623047
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,2,128,1,float16,float16,0,3.138549486796061
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,2,128,1,float16,fp8,0,0.1853440006573995
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,2,128,1,float16,fp8,0,2.7473014195760093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,2,128,1,float16,fp8,0,0.32549866040547687
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,4,128,1,float16,float16,0,3.7075627644856772
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,4,128,1,float16,fp8,0,3.22267214457194
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,8,128,1,float16,fp8,0,3.2054773966471353
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,1,128,1,float16,fp8,0,2.626784006754557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,2,128,1,float16,float16,0,1.58625062306722
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,2,128,1,float16,fp8,0,1.3976853688557942
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,4,128,1,float16,fp8,0,1.6767466862996419
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,40,128,1,float16,float16,0,2.382970650990804
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,40,128,1,float16,fp8,0,2.4098079999287925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,8,128,1,float16,float16,0,1.788442611694336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,8,128,1,float16,fp8,0,1.6343413988749187
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,40,128,1,float16,float16,0,1.1884640057881672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,1,128,1,float16,float16,0,0.789568026860555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,4,128,1,float16,float16,0,1.806890646616618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,1,128,1,float16,fp8,0,0.6879893143971761
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,2,128,1,float16,float16,0,0.8158026536305746
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,1,128,1,float16,fp8,0,1.3389066060384114
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,4,128,1,float16,float16,0,0.9152373472849528
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,4,128,1,float16,fp8,0,0.8435146808624268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,8,128,1,float16,float16,0,0.9117013613382975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,8,128,1,float16,fp8,0,0.8591732978820801
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,40,128,1,float16,fp8,0,1.2282880147298176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,40,128,1,float16,float16,0,0.6091786623001099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,1,128,1,float16,float16,0,0.40476266543070477
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,40,128,1,float16,fp8,0,0.6307146549224854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,8,128,1,float16,float16,0,3.566080093383789
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,1,128,1,float16,fp8,0,0.18068800369898477
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,1,128,1,float16,fp8,0,0.36402666568756104
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,2,128,1,float16,fp8,0,0.38122665882110596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,4,128,1,float16,float16,0,0.4777493476867676
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,4,128,1,float16,fp8,0,0.4524906476338704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,8,128,1,float16,fp8,0,0.44190935293833417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,8,128,1,float16,float16,0,0.4774080117543538
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,1,128,1,float16,float16,0,0.22098666429519653
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,40,128,1,float16,float16,0,0.3236266573270162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,2,128,1,float16,float16,0,0.22781866788864136
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,4,128,1,float16,float16,0,0.2543413241704305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,1,128,1,float16,float16,0,1.5746879577636719
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,4,128,1,float16,fp8,0,0.23667732874552408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,2,128,1,float16,float16,0,0.42207467555999756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,8,128,1,float16,float16,0,0.25886933008829754
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,8,128,1,float16,fp8,0,0.2474666635195414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,1,128,1,float16,float16,0,0.12749866644541422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,40,128,1,float16,float16,0,0.1907893419265747
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,1,128,1,float16,fp8,0,0.11778666575749715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,1,128,1,float16,fp8,0,0.20147732893625894
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,2,128,1,float16,fp8,0,0.12345600128173828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,4,128,1,float16,float16,0,0.140255997578303
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,2,128,1,float16,fp8,0,0.7185119787851969
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,8,128,1,float16,fp8,0,0.13544000188509622
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,40,128,1,float16,fp8,0,0.1896053353945414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,40,128,1,float16,fp8,0,0.33658134937286377
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,2,128,1,float16,float16,0,0.13421333829561868
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,1,128,1,float16,fp8,0,2.6763038635253906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,8,128,1,float16,float16,0,0.1420906682809194
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,2,128,1,float16,float16,0,3.214319864908854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,2,128,1,float16,fp8,0,2.8458986282348633
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,4,128,1,float16,float16,0,3.817978541056315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,4,128,1,float16,fp8,0,3.2951253255208335
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,1,128,1,float16,float16,0,3.0188318888346353
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,8,128,1,float16,float16,0,3.732858657836914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,4,128,1,float16,fp8,0,0.13295466701189676
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,8,128,1,float16,fp8,0,3.439397176106771
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,1,128,1,float16,fp8,0,1.3562080065409343
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,40,128,1,float16,fp8,0,2.806096076965332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,2,128,1,float16,float16,0,1.5909226735432942
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,2,128,1,float16,fp8,0,1.4371733665466309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,4,128,1,float16,float16,0,1.9497119585673015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,4,128,1,float16,fp8,0,1.740506649017334
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,8,128,1,float16,float16,0,1.8663147290547688
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,8,128,1,float16,fp8,0,1.799242655436198
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,1,128,1,float16,float16,0,0.7686826388041178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,2,128,1,float16,fp8,0,0.20985599358876547
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,1,128,1,float16,fp8,0,0.6948959827423096
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,2,128,1,float16,fp8,0,0.7383413314819336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,40,128,1,float16,float16,0,2.656826655069987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,4,128,1,float16,fp8,0,0.8751839796702067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,8,128,1,float16,float16,0,0.9471786816914877
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,8,128,1,float16,fp8,0,0.9028480052947998
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,1,128,1,float16,float16,0,0.4007466634114583
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,40,128,1,float16,fp8,0,1.4091839790344238
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,1,128,1,float16,float16,0,1.5185386339823406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,1,128,1,float16,fp8,0,0.3641279935836792
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,4,128,1,float16,float16,0,0.9500053723653158
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,2,128,1,float16,fp8,0,0.38553067048390705
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,4,128,1,float16,float16,0,0.5033973455429077
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,4,128,1,float16,fp8,0,0.4602293173472087
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,8,128,1,float16,fp8,0,0.4623946746190389
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,40,128,1,float16,float16,0,0.3547840118408203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,40,128,1,float16,fp8,0,0.3772746721903483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,40,128,1,float16,float16,0,0.6677599747975668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,2,128,1,float16,float16,0,0.8095146814982096
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,40,128,1,float16,fp8,0,0.7196053663889567
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,1,128,1,float16,fp8,0,0.20177066326141357
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,2,128,1,float16,float16,0,0.41702934106191
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,1,128,1,float16,float16,0,0.21773332357406616
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,4,128,1,float16,float16,0,0.26290132602055866
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,8,128,1,float16,fp8,0,0.24783466259638467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,8,128,1,float16,float16,0,0.2663946747779846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,8,128,1,float16,float16,0,0.49001598358154297
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,1,128,1,float16,float16,0,0.12106666962305705
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,40,128,1,float16,float16,0,0.20913066466649374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,2,128,1,float16,float16,0,0.12397332986195882
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,2,128,1,float16,fp8,0,0.11732799808184306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,4,128,1,float16,float16,0,0.13940800229708353
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,4,128,1,float16,fp8,0,0.1336373289426168
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,8,128,1,float16,float16,0,0.1446613371372223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,2,128,1,float16,float16,0,0.22873600323994955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,4,128,1,float16,fp8,0,0.25466134150822956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,1,128,1,float16,float16,0,0.07609599828720093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,40,128,1,float16,fp8,0,0.11019733548164368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,2,128,1,float16,float16,0,0.07646400233109792
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,2,128,1,float16,fp8,0,0.07067733506361644
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,40,128,1,float16,float16,0,1.3247466882069905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,4,128,1,float16,float16,0,0.08193600177764893
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,4,128,1,float16,fp8,0,0.07865066826343536
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,8,128,1,float16,float16,0,0.08190399905045827
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,2,128,1,float16,fp8,0,0.21043733755747476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,8,128,1,float16,fp8,0,0.07959466675917308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,40,128,1,float16,float16,0,0.11896000305811565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,1,128,1,float16,fp8,0,0.06861866513888042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,1,128,1,float16,float16,0,1.8689866065979004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,1,128,1,float16,fp8,0,0.11301866173744202
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,1,128,1,float16,fp8,0,1.7108640670776367
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,2,128,1,float16,float16,0,2.0220160484313965
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,2,128,1,float16,fp8,0,1.8403306007385254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,8,128,1,float16,fp8,0,0.13406399885813394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,8,128,1,float16,float16,0,2.4311413764953613
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,8,128,1,float16,fp8,0,2.3058133125305176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,40,128,1,float16,fp8,0,0.2038080096244812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,1,128,1,float16,float16,0,0.960256020228068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,40,128,1,float16,fp8,0,1.9355252583821614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,40,128,1,float16,float16,0,1.7939252853393555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,2,128,1,float16,fp8,0,0.9292053381601969
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,2,128,1,float16,float16,0,0.9886879920959473
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,4,128,1,float16,float16,0,2.443589369455973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,4,128,1,float16,fp8,0,2.256901264190674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,4,128,1,float16,fp8,0,1.242250680923462
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,8,128,1,float16,float16,0,1.250597318013509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,1,128,1,float16,float16,0,0.49273598194122314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,1,128,1,float16,fp8,0,0.4505813519159953
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,40,128,1,float16,float16,0,0.8913386662801107
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,2,128,1,float16,float16,0,0.5108480056126913
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,4,128,1,float16,float16,0,0.6283520062764486
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,1,128,1,float16,fp8,0,0.8721919854482015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,4,128,1,float16,fp8,0,0.6092053254445394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,8,128,1,float16,fp8,0,0.6024746497472128
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,8,128,1,float16,float16,0,0.6320639848709106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,4,128,1,float16,float16,0,1.207055966059367
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,1,128,1,float16,float16,0,0.255349338054657
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,40,128,1,float16,float16,0,0.45866668224334717
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,40,128,1,float16,fp8,0,0.505898674329122
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,2,128,1,float16,float16,0,0.2675146659215291
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,2,128,1,float16,fp8,0,0.25783467292785645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,4,128,1,float16,fp8,0,0.3245866696039836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,8,128,1,float16,fp8,0,0.31776533524195355
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,2,128,1,float16,fp8,0,0.47917866706848145
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,1,128,1,float16,float16,0,0.1436746617158254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,1,128,1,float16,fp8,0,0.1318666636943817
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,2,128,1,float16,float16,0,0.1511786679426829
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,1,128,1,float16,fp8,0,0.24172266324361166
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,2,128,1,float16,fp8,0,0.13793067137400308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,8,128,1,float16,fp8,0,1.1870453357696533
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,4,128,1,float16,fp8,0,0.16731733083724976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,4,128,1,float16,float16,0,0.33531733353932697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,8,128,1,float16,float16,0,0.32418666283289593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,8,128,1,float16,fp8,0,0.17498133579889932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,1,128,1,float16,float16,0,0.08306133250395457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,40,128,1,float16,float16,0,0.15081600348154703
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,40,128,1,float16,fp8,0,0.1487573285897573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,2,128,1,float16,float16,0,0.08563733100891113
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,4,128,1,float16,float16,0,0.09263466795285542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,4,128,1,float16,float16,0,0.1757919987042745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,4,128,1,float16,fp8,0,0.09307199716567993
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,8,128,1,float16,float16,0,0.0962559978167216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,8,128,1,float16,fp8,0,0.09307199716567993
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,40,128,1,float16,fp8,0,0.07914666831493378
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,40,128,1,float16,fp8,0,0.2662880023320516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,1,128,1,float16,float16,0,0.05417066812515259
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,2,128,1,float16,fp8,0,0.08266133566697438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,2,128,1,float16,float16,0,0.055946667989095054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,2,128,1,float16,fp8,0,0.053114667534828186
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,4,128,1,float16,float16,0,0.059605335195859276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,4,128,1,float16,fp8,0,0.058677335580190025
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,8,128,1,float16,float16,0,0.180351992448171
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,8,128,1,float16,float16,0,0.06164266665776571
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,40,128,1,float16,float16,0,0.08342400193214417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,40,128,1,float16,float16,0,0.2618666688601176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,1,128,1,float16,float16,0,1.9541172981262207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,1,128,1,float16,fp8,0,0.05190933247407278
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,2,128,1,float16,float16,0,2.1317386627197266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,40,128,1,float16,fp8,0,0.9814399878184
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,8,128,1,float16,fp8,0,0.059445331494013466
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,2,128,1,float16,fp8,0,2.023973306020101
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,4,128,1,float16,float16,0,2.7028481165568032
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,4,128,1,float16,fp8,0,2.7930453618367515
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,1,128,1,float16,fp8,0,1.8617067337036133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,8,128,1,float16,fp8,0,2.6200265884399414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,40,128,1,float16,float16,0,2.080458641052246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,1,128,1,float16,float16,0,0.9891786575317383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,40,128,1,float16,fp8,0,2.354778607686361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,2,128,1,float16,fp8,0,1.0305279890696208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,4,128,1,float16,float16,0,1.395583947499593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,4,128,1,float16,fp8,0,1.3734347025553386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,8,128,1,float16,float16,0,1.353866736094157
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,8,128,1,float16,fp8,0,1.4077760378519695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,1,128,1,float16,float16,0,0.5121546586354574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,8,128,1,float16,float16,0,2.656912008921305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,2,128,1,float16,float16,0,0.5457013448079427
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,2,128,1,float16,fp8,0,0.523904005686442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,40,128,1,float16,float16,0,1.0502453645070393
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,4,128,1,float16,float16,0,0.7031199932098389
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,4,128,1,float16,fp8,0,0.6791199843088785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,1,128,1,float16,fp8,0,0.07815999786059062
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,8,128,1,float16,fp8,0,0.6764693260192871
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,40,128,1,float16,fp8,0,0.607754667599996
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,1,128,1,float16,fp8,0,0.25774399439493817
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,40,128,1,float16,fp8,0,1.1898720264434814
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,2,128,1,float16,float16,0,0.28301332394282025
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,8,128,1,float16,float16,0,0.718069314956665
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,4,128,1,float16,float16,0,0.36908801396687824
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,40,128,1,float16,float16,0,0.5378506580988566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,4,128,1,float16,fp8,0,0.3810453414916992
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,1,128,1,float16,fp8,0,0.48494935035705566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,8,128,1,float16,float16,0,0.3676266670227051
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,2,128,1,float16,fp8,0,0.2750613292058309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,40,128,1,float16,float16,0,0.2799893418947856
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,1,128,1,float16,float16,0,0.14913066228230795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,1,128,1,float16,fp8,0,0.14443199833234152
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,2,128,1,float16,float16,0,0.1584160029888153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,2,128,1,float16,fp8,0,0.15316266814867655
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,4,128,1,float16,fp8,0,0.19073599576950073
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,2,128,1,float16,float16,0,1.080624024073283
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,8,128,1,float16,fp8,0,0.3591466744740804
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,40,128,1,float16,fp8,0,0.31700267394383747
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,1,128,1,float16,float16,0,0.08403733372688293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,1,128,1,float16,fp8,0,0.07826133569081624
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,1,128,1,float16,float16,0,0.26900800069173175
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,2,128,1,float16,float16,0,0.08746133248011272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,2,128,1,float16,fp8,0,0.08328533172607422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,8,128,1,float16,float16,0,0.19341333707173666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,4,128,1,float16,float16,0,0.09872532884279887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,8,128,1,float16,fp8,0,0.20134933789571127
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,8,128,1,float16,float16,0,0.10617599884668986
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,8,128,1,float16,fp8,0,0.09977066516876221
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,1,128,1,float16,float16,0,0.049973333875338234
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,40,128,1,float16,float16,0,0.15195199847221375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,40,128,1,float16,float16,0,0.0925439993540446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,40,128,1,float16,fp8,0,0.08886399865150452
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,2,128,1,float16,float16,0,0.05197866757710775
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,2,128,1,float16,fp8,0,0.049786667029062905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,4,128,1,float16,float16,0,0.05668266614278158
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,4,128,1,float16,fp8,0,0.057818666100502014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,8,128,1,float16,fp8,0,0.05839466551939646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,1,128,1,float16,fp8,0,0.9461332956949869
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,4,128,1,float16,fp8,0,0.09824533263842265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,1,128,1,float16,float16,0,0.040250666439533234
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,1,128,1,float16,fp8,0,0.03416533271471659
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,2,128,1,float16,float16,0,0.037765334049860634
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,40,128,1,float16,fp8,0,0.1692426602045695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,1,128,1,float16,fp8,0,0.047509332497914634
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,4,128,1,float16,fp8,0,0.03979733337958654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,4,128,1,float16,float16,0,0.19151467084884644
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,40,128,1,float16,fp8,0,0.054005334774653115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,2,128,1,float16,fp8,0,0.03545066714286804
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,8,128,1,float16,float16,0,0.03976533313592275
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,8,128,1,float16,float16,0,0.05673066775004069
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,40,128,1,float16,float16,0,0.045935998360315956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,2,128,1,float16,fp8,0,1.6200586954752605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,1,128,1,float16,float16,0,1.4606720606486003
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,4,128,1,float16,float16,0,2.3187360763549805
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,1,128,1,float16,fp8,0,1.4491519927978516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,4,128,1,float16,fp8,0,2.376197338104248
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,2,128,1,float16,float16,0,1.5959253311157227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,4,128,1,float16,float16,0,0.039706667264302574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,8,128,1,float16,float16,0,2.132746696472168
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,1,128,1,float16,fp8,0,0.73908797899882
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,8,128,1,float16,fp8,0,2.2091520627339682
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,40,128,1,float16,float16,0,1.8335679372151692
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,2,128,1,float16,float16,0,0.8093547026316324
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,4,128,1,float16,float16,0,1.1213013331095378
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,2,128,1,float16,fp8,0,0.816309372584025
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,40,128,1,float16,fp8,0,2.1386826833089194
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,1,128,1,float16,float16,0,0.7401119867960612
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,1,128,1,float16,float16,0,0.3848746617635091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,1,128,1,float16,fp8,0,0.3784799973169963
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,40,128,1,float16,fp8,0,1.0791733264923096
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,2,128,1,float16,float16,0,0.42448000113169354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,2,128,1,float16,fp8,0,0.4185226758321126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,4,128,1,float16,float16,0,0.564789334932963
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,8,128,1,float16,float16,0,0.5577439864476522
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,8,128,1,float16,fp8,0,0.04013866682847341
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,8,128,1,float16,fp8,0,0.6218453248341879
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,1,128,1,float16,float16,0,0.20376000801722208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,1,128,1,float16,fp8,0,0.2000853419303894
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,4,128,1,float16,fp8,0,0.5687199831008911
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,2,128,1,float16,float16,0,0.21897067626317343
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,8,128,1,float16,fp8,0,1.1544000307718914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,2,128,1,float16,fp8,0,0.22313600778579712
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,40,128,1,float16,float16,0,0.9189813137054443
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,4,128,1,float16,float16,0,0.3064853350321452
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,8,128,1,float16,float16,0,0.2872106631596883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,8,128,1,float16,fp8,0,0.3095146616299947
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,40,128,1,float16,fp8,0,0.5514346758524576
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,40,128,1,float16,float16,0,0.24683733781178793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,40,128,1,float16,fp8,0,0.2871519923210144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,1,128,1,float16,fp8,0,0.11144000291824341
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,2,128,1,float16,float16,0,0.12146666646003723
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,2,128,1,float16,fp8,0,0.12179199854532878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,8,128,1,float16,float16,0,1.0842613379160564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,4,128,1,float16,fp8,0,0.15843733151753744
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,8,128,1,float16,float16,0,0.16580800215403238
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,4,128,1,float16,fp8,0,0.29893332719802856
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,40,128,1,float16,float16,0,0.4700640042622884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,40,128,1,float16,float16,0,0.13277332981427512
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,1,128,1,float16,float16,0,0.11500799655914307
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,1,128,1,float16,fp8,0,0.061978667974472046
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,2,128,1,float16,fp8,0,0.06723199784755707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,4,128,1,float16,float16,0,0.08077866832415263
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,4,128,1,float16,fp8,0,1.2188533147176106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,4,128,1,float16,fp8,0,0.08180266618728638
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,8,128,1,float16,float16,0,0.0884320040543874
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,8,128,1,float16,fp8,0,0.16184533635775247
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,1,128,1,float16,float16,0,0.06458133459091187
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,40,128,1,float16,fp8,0,0.07911466558774312
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,40,128,1,float16,float16,0,0.07396799822648366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,1,128,1,float16,float16,0,0.039664000272750854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,2,128,1,float16,float16,0,0.06903466582298279
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,2,128,1,float16,float16,0,0.041759997606277466
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,4,128,1,float16,float16,0,0.04683200021584829
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,4,128,1,float16,float16,0,0.16157866517702738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,4,128,1,float16,fp8,0,0.04808000226815542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,8,128,1,float16,fp8,0,0.05005866785844167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,8,128,1,float16,fp8,0,0.08554133772850037
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,40,128,1,float16,float16,0,0.03958933303753535
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,1,128,1,float16,float16,0,0.029578665892283123
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,40,128,1,float16,fp8,0,0.1528426706790924
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,2,128,1,float16,float16,0,0.029546665648619335
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,1,128,1,float16,fp8,0,0.03961600114901861
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,4,128,1,float16,float16,0,0.03299733251333237
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,2,128,1,float16,fp8,0,0.04092800120512644
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,4,128,1,float16,fp8,0,0.03357866654793421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,8,128,1,float16,float16,0,0.046949331959088646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,40,128,1,float16,float16,0,0.025909334421157837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,1,128,1,float16,fp8,0,0.021104000508785248
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,1,128,1,float16,fp8,0,0.0284853329261144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,2,128,1,float16,fp8,0,0.021354667842388153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,2,128,1,float16,fp8,0,0.0306986669699351
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,8,128,1,float16,float16,0,0.03362133353948593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,4,128,1,float16,fp8,0,0.023306667804718018
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,8,128,1,float16,float16,0,0.0233599990606308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,8,128,1,float16,fp8,0,0.023418667415777843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,40,128,1,float16,fp8,0,0.03014400104681651
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,40,128,1,float16,fp8,0,0.04807466765244802
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,2,128,1,float16,float16,0,0.021125334004561108
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,1,128,1,float16,fp8,0,0.6260266701380411
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,2,128,1,float16,float16,0,0.6843093236287435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,4,128,1,float16,float16,0,0.021898667017618816
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,8,128,1,float16,fp8,0,0.033743999898433685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,2,128,1,float16,fp8,0,0.7115733623504639
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,4,128,1,float16,float16,0,0.9926239649454752
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,4,128,1,float16,fp8,0,0.948527971903483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,8,128,1,float16,float16,0,1.0359893639882405
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,1,128,1,float16,float16,0,0.6153440078099569
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,8,128,1,float16,fp8,0,1.0843040148417156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,1,128,1,float16,float16,0,0.31853334108988446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,40,128,1,float16,fp8,0,1.0206879774729412
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,2,128,1,float16,fp8,0,0.36108267307281494
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,2,128,1,float16,float16,0,0.3643466631571452
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,1,128,1,float16,float16,0,0.021114667256673176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,8,128,1,float16,fp8,0,0.5258080164591471
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,40,128,1,float16,float16,0,0.8495573202768961
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,40,128,1,float16,float16,0,0.4343786636988322
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,1,128,1,float16,fp8,0,0.3232640027999878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,1,128,1,float16,fp8,0,0.17360534270604452
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,2,128,1,float16,fp8,0,0.19729065895080566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,2,128,1,float16,float16,0,0.19670399030049643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,4,128,1,float16,fp8,0,0.49243199825286865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,4,128,1,float16,float16,0,0.5081866582234701
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,8,128,1,float16,float16,0,0.5277546644210815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,8,128,1,float16,float16,0,0.2546773354212443
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,8,128,1,float16,fp8,0,0.2791573405265808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,1,128,1,float16,float16,0,0.09559999903043111
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,40,128,1,float16,float16,0,0.2267306645711263
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,1,128,1,float16,float16,0,0.17229332526524863
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,2,128,1,float16,fp8,0,0.10589866836865743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,4,128,1,float16,float16,0,0.13797332843144736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,4,128,1,float16,fp8,0,0.142767995595932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,8,128,1,float16,float16,0,0.14429333806037903
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,8,128,1,float16,fp8,0,0.1527839998404185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,40,128,1,float16,float16,0,0.12380799651145935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,1,128,1,float16,fp8,0,0.09326400359471639
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,40,128,1,float16,fp8,0,0.14446933070818582
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,1,128,1,float16,fp8,0,0.05302399893601736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,2,128,1,float16,float16,0,0.05795200169086456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,2,128,1,float16,fp8,0,0.05811200042565664
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,4,128,1,float16,float16,0,0.07135466734568278
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,4,128,1,float16,float16,0,0.27245867252349854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,4,128,1,float16,fp8,0,0.2666826645533244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,4,128,1,float16,fp8,0,0.07444799939791362
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,8,128,1,float16,fp8,0,0.07454933226108551
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,1,128,1,float16,float16,0,0.03513066718975703
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,40,128,1,float16,float16,0,0.06891733407974243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,1,128,1,float16,fp8,0,0.034645333886146545
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,2,128,1,float16,float16,0,0.10212266445159912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,2,128,1,float16,fp8,0,0.03608000030120214
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,4,128,1,float16,fp8,0,0.04380266865094503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,8,128,1,float16,float16,0,0.041834667325019836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,8,128,1,float16,float16,0,0.07789866626262665
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,40,128,1,float16,fp8,0,0.07439466814200084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,40,128,1,float16,fp8,0,0.5201173226038615
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,1,128,1,float16,float16,0,0.05433600147565206
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,4,128,1,float16,float16,0,0.04160533348719279
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,40,128,1,float16,fp8,0,0.04445866743723551
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,2,128,1,float16,float16,0,0.027434666951497395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,4,128,1,float16,float16,0,0.029552000264326733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,4,128,1,float16,fp8,0,0.029445332785447437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,8,128,1,float16,fp8,0,0.04428799947102865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,8,128,1,float16,float16,0,0.02938133229811986
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,8,128,1,float16,fp8,0,0.031013332307338715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,40,128,1,float16,float16,0,0.03566933423280716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,40,128,1,float16,fp8,0,0.26895467440287274
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,1,128,1,float16,float16,0,0.025402667621771496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,1,128,1,float16,float16,0,0.017370666066805523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,40,128,1,float16,fp8,0,0.02736533433198929
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,40,128,1,float16,float16,0,0.022687998910744984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,2,128,1,float16,fp8,0,0.02734400083621343
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,2,128,1,float16,fp8,0,0.01738133281469345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,4,128,1,float16,fp8,0,0.01918399954835574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,8,128,1,float16,float16,0,0.019039999693632126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,8,128,1,float16,fp8,0,0.01950399950146675
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,2,128,1,float16,float16,0,0.035818666219711304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,40,128,1,float16,fp8,0,0.021514666577180225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,1,128,1,float16,fp8,0,0.01721599946419398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,1,128,1,float16,fp8,0,0.025306666890780132
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,2,128,1,float16,float16,0,0.017184000462293625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,4,128,1,float16,float16,0,0.01729600007335345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,4,128,1,float16,fp8,0,0.017450666675964992
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,8,128,1,float16,float16,0,0.017349333812793095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,8,128,1,float16,fp8,0,0.017269333203633625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,40,128,1,float16,float16,0,0.019130667050679524
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,1,128,1,float16,fp8,0,0.376911997795105
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,1,128,1,float16,float16,0,0.017312000195185345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,1,128,1,float16,fp8,0,0.017418666432301205
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,2,128,1,float16,fp8,0,0.017077332983414333
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,2,128,1,float16,float16,0,0.01738133281469345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,2,128,1,float16,float16,0,0.417248010635376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,2,128,1,float16,fp8,0,0.4214773178100586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,4,128,1,float16,float16,0,0.5541599988937378
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,8,128,1,float16,float16,0,0.5508426825205485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,1,128,1,float16,float16,0,0.37249600887298584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,1,128,1,float16,float16,0,0.1970240076382955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,40,128,1,float16,float16,0,0.45961066087086994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,40,128,1,float16,fp8,0,0.5450559854507446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,2,128,1,float16,float16,0,0.21213332811991373
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,1,128,1,float16,fp8,0,0.19762667020161948
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,4,128,1,float16,float16,0,0.019248000035683315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,2,128,1,float16,fp8,0,0.2171893318494161
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,4,128,1,float16,float16,0,0.2959786653518677
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,4,128,1,float16,fp8,0,0.2932426730791728
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,4,128,1,float16,fp8,0,0.5914080142974854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,1,128,1,float16,float16,0,0.10825600226720174
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,1,128,1,float16,fp8,0,0.10801066954930623
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,2,128,1,float16,float16,0,0.1157866617043813
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,8,128,1,float16,fp8,0,0.6231253147125244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,2,128,1,float16,fp8,0,0.11700266599655151
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,4,128,1,float16,fp8,0,0.1567359964052836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,8,128,1,float16,float16,0,0.15944000085194907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,8,128,1,float16,fp8,0,0.16660267114639282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,1,128,1,float16,float16,0,0.05839466551939646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,8,128,1,float16,fp8,0,0.29420266548792523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,40,128,1,float16,float16,0,0.12602133552233377
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,40,128,1,float16,float16,0,0.23788267374038696
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,40,128,1,float16,fp8,0,0.28195200363794964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,2,128,1,float16,float16,0,0.062074666221936546
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,2,128,1,float16,fp8,0,0.06229333579540253
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,4,128,1,float16,fp8,0,0.07845866680145264
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,8,128,1,float16,float16,0,0.08095466593901317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,8,128,1,float16,fp8,0,0.0790293316046397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,4,128,1,float16,float16,0,0.15647466977437338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,1,128,1,float16,float16,0,0.035631999373435974
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,40,128,1,float16,float16,0,0.06833600004514058
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,2,128,1,float16,float16,0,0.035717333356539406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,2,128,1,float16,fp8,0,0.03757333258787791
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,4,128,1,float16,float16,0,0.04172799984614054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,1,128,1,float16,fp8,0,0.05663466453552246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,4,128,1,float16,fp8,0,0.044341335693995156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,8,128,1,float16,float16,0,0.0418453315893809
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,4,128,1,float16,float16,0,0.08348799745241801
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,1,128,1,float16,float16,0,0.023408000667889912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,40,128,1,float16,fp8,0,0.04373333354791006
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,1,128,1,float16,fp8,0,0.023381332556406658
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,1,128,1,float16,fp8,0,0.03548266738653183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,2,128,1,float16,float16,0,0.023685333629449207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,2,128,1,float16,fp8,0,0.025381334125995636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,8,128,1,float16,float16,0,0.28038932879765827
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,8,128,1,float16,float16,0,0.027402666707833607
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,8,128,1,float16,fp8,0,0.029626667499542236
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,8,128,1,float16,fp8,0,0.045935998360315956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,40,128,1,float16,float16,0,0.02332266668478648
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,40,128,1,float16,float16,0,0.033520000676314034
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,2,128,1,float16,float16,0,0.01926933353145917
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,4,128,1,float16,float16,0,0.02739733209212621
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,4,128,1,float16,fp8,0,0.02922666569550832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,4,128,1,float16,float16,0,0.019141333798567455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,40,128,1,float16,fp8,0,0.14816000064214072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,8,128,1,float16,float16,0,0.01945066700379054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,8,128,1,float16,fp8,0,0.021722666919231415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,40,128,1,float16,fp8,0,0.02737066646416982
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,1,128,1,float16,fp8,0,0.018357332795858383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,40,128,1,float16,fp8,0,0.01933866615096728
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,1,128,1,float16,fp8,0,0.01515199989080429
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,40,128,1,float16,fp8,0,0.07625600198904674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,2,128,1,float16,float16,0,0.013274667163689932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,2,128,1,float16,fp8,0,0.01878400022784869
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,4,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,4,128,1,float16,fp8,0,0.019914666811625164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,8,128,1,float16,fp8,0,0.013557333499193192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,40,128,1,float16,float16,0,0.015285332997639975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,1,128,1,float16,float16,0,0.017808000246683758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,1,128,1,float16,float16,0,0.013248000293970108
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,1,128,1,float16,fp8,0,0.013141332815090815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,2,128,1,float16,float16,0,0.013183999806642532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,2,128,1,float16,fp8,0,0.013327999661366144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,4,128,1,float16,float16,0,0.01302933320403099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,8,128,1,float16,float16,0,0.013295999417702356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,4,128,1,float16,float16,0,0.013210666676362356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,8,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,40,128,1,float16,float16,0,0.015125333021084467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,40,128,1,float16,fp8,0,0.01708799973130226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,1,128,1,float16,float16,0,0.2824373245239258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,2,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,4,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,1,128,1,float16,fp8,0,0.28707732756932575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,8,128,1,float16,float16,0,0.013050666699806849
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,4,128,1,float16,float16,0,0.3885973294576009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,1,128,1,float16,float16,0,0.0129120002190272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,4,128,1,float16,fp8,0,0.4095199902852376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,8,128,1,float16,float16,0,0.3847626845041911
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,8,128,1,float16,fp8,0,0.3811733325322469
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,40,128,1,float16,float16,0,0.3017386595408122
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,1,128,1,float16,float16,0,0.15563199917475382
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,1,128,1,float16,fp8,0,0.15310399731000265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,2,128,1,float16,fp8,0,0.3032906651496887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,2,128,1,float16,float16,0,0.16275200247764587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,2,128,1,float16,fp8,0,0.16158399979273477
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,4,128,1,float16,float16,0,0.19697066148122153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,4,128,1,float16,fp8,0,0.20722667376200357
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,8,128,1,float16,float16,0,0.20364266633987427
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,8,128,1,float16,fp8,0,0.20973332722981772
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,1,128,1,float16,fp8,0,0.07981866598129272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,40,128,1,float16,fp8,0,0.16902933518091837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,40,128,1,float16,float16,0,0.16299733519554138
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,2,128,1,float16,float16,0,0.0881173312664032
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,2,128,1,float16,fp8,0,0.08544533451398213
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,4,128,1,float16,fp8,0,0.10131200154622395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,40,128,1,float16,fp8,0,0.3246293266614278
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,8,128,1,float16,float16,0,0.10870400071144104
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,2,128,1,float16,float16,0,0.2990880012512207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,40,128,1,float16,float16,0,0.08713600039482117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,40,128,1,float16,fp8,0,0.08568533261617024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,1,128,1,float16,fp8,0,0.04587199787298838
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,1,128,1,float16,float16,0,0.08296533425649007
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,4,128,1,float16,float16,0,0.05232533315817515
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,1,128,1,float16,float16,0,0.045850664377212524
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,4,128,1,float16,fp8,0,0.0543146679798762
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,8,128,1,float16,float16,0,0.05338133374849955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,8,128,1,float16,fp8,0,0.05481066803137461
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,1,128,1,float16,float16,0,0.028773332635561626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,1,128,1,float16,fp8,0,0.027402666707833607
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,40,128,1,float16,float16,0,0.0391839991013209
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,2,128,1,float16,float16,0,0.029338667790095013
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,4,128,1,float16,float16,0,0.03160000095764796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,8,128,1,float16,float16,0,0.031445334355036415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,2,128,1,float16,fp8,0,0.0468800018231074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,8,128,1,float16,fp8,0,0.033471999069054924
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,4,128,1,float16,float16,0,0.10389332969983418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,40,128,1,float16,fp8,0,0.029461334149042766
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,40,128,1,float16,float16,0,0.025055999557177227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,1,128,1,float16,float16,0,0.019845332950353622
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,8,128,1,float16,fp8,0,0.10196266571680705
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,1,128,1,float16,fp8,0,0.01950399950146675
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,4,128,1,float16,float16,0,0.021301334102948506
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,8,128,1,float16,float16,0,0.02123733361562093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,8,128,1,float16,fp8,0,0.02162133405605952
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,2,128,1,float16,float16,0,0.04780800143877665
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,40,128,1,float16,fp8,0,0.02126399924357732
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,40,128,1,float16,float16,0,0.0173333336909612
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,1,128,1,float16,float16,0,0.015082667271296183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,1,128,1,float16,fp8,0,0.01516266663869222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,2,128,1,float16,fp8,0,0.019797333826621372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,2,128,1,float16,float16,0,0.015098666151364645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,4,128,1,float16,fp8,0,0.02111999938885371
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,2,128,1,float16,fp8,0,0.015189333508412043
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,4,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,8,128,1,float16,float16,0,0.015311999867359797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,8,128,1,float16,fp8,0,0.015504000087579092
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,1,128,1,float16,float16,0,0.013130666067202887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,40,128,1,float16,float16,0,0.014096000542243322
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,40,128,1,float16,fp8,0,0.01498666654030482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,2,128,1,float16,float16,0,0.013114667187134424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,2,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,40,128,1,float16,fp8,0,0.04796266555786133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,2,128,1,float16,float16,0,0.019152000546455383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,8,128,1,float16,float16,0,0.013162666310866674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,4,128,1,float16,fp8,0,0.033626665671666466
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,4,128,1,float16,float16,0,0.01551466683546702
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,1,128,1,float16,float16,0,0.012890666723251343
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,40,128,1,float16,fp8,0,0.015130666395028433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,40,128,1,float16,float16,0,0.01328533391157786
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,2,128,1,float16,float16,0,0.011727999895811081
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,4,128,1,float16,float16,0,0.012015999605258306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,4,128,1,float16,float16,0,0.012965332716703415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,4,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,2,128,1,float16,fp8,0,0.02956266701221466
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,8,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,8,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,1,128,1,float16,float16,0,0.2477333347002665
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,1,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,1,128,1,float16,fp8,0,0.24059200286865234
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,1,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,2,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,4,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,4,128,1,float16,float16,0,0.2871840000152588
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,4,128,1,float16,fp8,0,0.2882080078125
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,8,128,1,float16,float16,0,0.292522668838501
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,8,128,1,float16,fp8,0,0.29893332719802856
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,40,128,1,float16,fp8,0,0.21244800090789795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,2,128,1,float16,float16,0,0.25043733914693195
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,1,128,1,float16,float16,0,0.1318773329257965
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,1,128,1,float16,fp8,0,0.12397866447766621
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,8,128,1,float16,float16,0,0.012272000312805176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,2,128,1,float16,fp8,0,0.12995733817418417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,4,128,1,float16,fp8,0,0.1441333293914795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,8,128,1,float16,float16,0,0.15295466780662537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,8,128,1,float16,fp8,0,0.15252799789110819
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,40,128,1,float16,float16,0,0.1030506690343221
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,1,128,1,float16,float16,0,0.06966933111349742
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,1,128,1,float16,fp8,0,0.06752533217271169
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,2,128,1,float16,fp8,0,0.06963733335336049
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,4,128,1,float16,float16,0,0.07646933197975159
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,2,128,1,float16,fp8,0,0.2492213249206543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,4,128,1,float16,float16,0,0.15377066532770792
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,8,128,1,float16,float16,0,0.07716266810894012
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,8,128,1,float16,fp8,0,0.07827199995517731
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,1,128,1,float16,float16,0,0.04002666721741358
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,40,128,1,float16,fp8,0,0.11220266421635945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,40,128,1,float16,fp8,0,0.05808533231417338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,2,128,1,float16,fp8,0,0.039621333281199135
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,4,128,1,float16,float16,0,0.0436106671889623
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,2,128,1,float16,float16,0,0.13425599535306296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,4,128,1,float16,fp8,0,0.043765331308046974
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,8,128,1,float16,float16,0,0.044112001856168113
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,8,128,1,float16,fp8,0,0.043706665436426796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,1,128,1,float16,fp8,0,0.037685332198937736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,40,128,1,float16,fp8,0,0.03522666543722153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,40,128,1,float16,float16,0,0.030293333033720653
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,1,128,1,float16,float16,0,0.025370667378107708
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,2,128,1,float16,float16,0,0.07250133156776428
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,2,128,1,float16,fp8,0,0.025311999022960663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,2,128,1,float16,float16,0,0.025407999753952026
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,4,128,1,float16,fp8,0,0.027461332579453785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,8,128,1,float16,float16,0,0.027450665831565857
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,8,128,1,float16,fp8,0,0.027493332823117573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,40,128,1,float16,float16,0,0.019386666516462963
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,40,128,1,float16,fp8,0,0.023290666441122692
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,1,128,1,float16,float16,0,0.017674667139848072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,2,128,1,float16,float16,0,0.017984000345071156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,2,128,1,float16,float16,0,0.04165866722663244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,2,128,1,float16,fp8,0,0.01882133384545644
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,4,128,1,float16,float16,0,0.01907733331123988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,4,128,1,float16,fp8,0,0.019248000035683315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,8,128,1,float16,float16,0,0.019167999426523846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,8,128,1,float16,fp8,0,0.01923199991385142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,40,128,1,float16,fp8,0,0.017269333203633625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,40,128,1,float16,float16,0,0.2082080046335856
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,1,128,1,float16,float16,0,0.014954666296641031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,1,128,1,float16,fp8,0,0.019130667050679524
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,2,128,1,float16,float16,0,0.014933332800865173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,2,128,1,float16,fp8,0,0.015285332997639975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,1,128,1,float16,fp8,0,0.015130666395028433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,4,128,1,float16,float16,0,0.015194666882356008
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,4,128,1,float16,fp8,0,0.015295999745527903
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,8,128,1,float16,float16,0,0.015018666783968607
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,8,128,1,float16,fp8,0,0.015178666760524115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,1,128,1,float16,float16,0,0.01119999960064888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,1,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,40,128,1,float16,float16,0,0.054272000988324486
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,2,128,1,float16,float16,0,0.011226666470368704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,4,128,1,float16,float16,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,2,128,1,float16,fp8,0,0.011525332927703857
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,4,128,1,float16,fp8,0,0.013845333208640417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,8,128,1,float16,fp8,0,0.013162666310866674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,40,128,1,float16,float16,0,0.013183999806642532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,40,128,1,float16,float16,0,0.01314666618903478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,40,128,1,float16,fp8,0,0.01522133375207583
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,1,128,1,float16,fp8,0,0.025306666890780132
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,4,128,1,float16,float16,0,0.027376001079877216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,8,128,1,float16,float16,0,0.011653333902359009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,2,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,4,128,1,float16,float16,0,0.011194666226704916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,4,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,8,128,1,float16,float16,0,0.010960000256697336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,40,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,8,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,1,128,1,float16,float16,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,1,128,1,float16,fp8,0,0.011946666985750198
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,1,128,1,float16,float16,0,0.23547732830047607
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,1,128,1,float16,fp8,0,0.21393599112828574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,2,128,1,float16,fp8,0,0.21978133916854858
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,4,128,1,float16,float16,0,0.25203200181325275
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,4,128,1,float16,fp8,0,0.23908267418543497
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,8,128,1,float16,float16,0,0.2539253234863281
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,8,128,1,float16,fp8,0,0.24605866273244223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,40,128,1,float16,float16,0,0.16290133198102316
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,40,128,1,float16,fp8,0,0.15683199961980185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,1,128,1,float16,fp8,0,0.11374400059382121
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,2,128,1,float16,float16,0,0.12340799967447917
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,4,128,1,float16,float16,0,0.13079466422398886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,40,128,1,float16,float16,0,0.015354666858911514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,4,128,1,float16,fp8,0,0.1223413348197937
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,1,128,1,float16,float16,0,0.12505066394805908
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,4,128,1,float16,fp8,0,0.07680533329645793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,8,128,1,float16,fp8,0,0.1239466667175293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,1,128,1,float16,float16,0,0.06801066795984904
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,40,128,1,float16,float16,0,0.09010666608810425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,2,128,1,float16,fp8,0,0.11589866876602173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,1,128,1,float16,fp8,0,0.06227200229962667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,2,128,1,float16,float16,0,0.06900266806284587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,4,128,1,float16,float16,0,0.07108266651630402
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,4,128,1,float16,fp8,0,0.06650133430957794
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,8,128,1,float16,float16,0,0.13179199894269308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,8,128,1,float16,float16,0,0.07070399820804596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,2,128,1,float16,float16,0,0.23904534180959067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,8,128,1,float16,fp8,0,0.06731200218200684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,1,128,1,float16,float16,0,0.03976533313592275
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,40,128,1,float16,float16,0,0.04437866806983948
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,40,128,1,float16,fp8,0,0.08229866623878479
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,2,128,1,float16,fp8,0,0.035530666510264076
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,4,128,1,float16,float16,0,0.040250666439533234
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,4,128,1,float16,fp8,0,0.037658666570981346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,8,128,1,float16,float16,0,0.040031999349594116
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,2,128,1,float16,fp8,0,0.06270933151245117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,1,128,1,float16,float16,0,0.025445332129796345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,40,128,1,float16,float16,0,0.027493332823117573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,1,128,1,float16,fp8,0,0.023589332898457844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,2,128,1,float16,float16,0,0.025418666501839954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,2,128,1,float16,fp8,0,0.0234400009115537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,2,128,1,float16,float16,0,0.03955200066169103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,4,128,1,float16,fp8,0,0.023333333432674408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,8,128,1,float16,float16,0,0.025248001019159954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,8,128,1,float16,fp8,0,0.03770133356253306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,40,128,1,float16,float16,0,0.019146667172511418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,40,128,1,float16,fp8,0,0.019519999623298645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,1,128,1,float16,fp8,0,0.017210666090250015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,2,128,1,float16,float16,0,0.01722666621208191
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,40,128,1,float16,fp8,0,0.04576533536116282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,2,128,1,float16,fp8,0,0.01724799970785777
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,4,128,1,float16,float16,0,0.017386666188637417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,8,128,1,float16,fp8,0,0.023845332364241283
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,4,128,1,float16,fp8,0,0.017279999951521557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,8,128,1,float16,fp8,0,0.017263999829689663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,40,128,1,float16,float16,0,0.01573333392540614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,40,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,1,128,1,float16,float16,0,0.01798933371901512
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,1,128,1,float16,float16,0,0.015141333142916361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,1,128,1,float16,fp8,0,0.015168000012636185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,4,128,1,float16,float16,0,0.02536533276240031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,4,128,1,float16,float16,0,0.01488000030318896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,4,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,8,128,1,float16,float16,0,0.015200000256299973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,8,128,1,float16,float16,0,0.0174346665541331
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,8,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,1,128,1,float16,float16,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,40,128,1,float16,fp8,0,0.015114666273196539
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,40,128,1,float16,fp8,0,0.02739199995994568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,2,128,1,float16,float16,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,2,128,1,float16,fp8,0,0.011077333241701126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,2,128,1,float16,float16,0,0.014949332922697067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,1,128,1,float16,fp8,0,0.03586133321126302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,8,128,1,float16,float16,0,0.011215999722480774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,8,128,1,float16,fp8,0,0.012784000486135483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,40,128,1,float16,float16,0,0.012965332716703415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,40,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,1,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,2,128,1,float16,float16,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,1,128,1,float16,fp8,0,0.01119999960064888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,2,128,1,float16,fp8,0,0.01126933346192042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,4,128,1,float16,float16,0,0.011237333218256632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,2,128,1,float16,fp8,0,0.015173333386580149
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,4,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,8,128,1,float16,float16,0,0.01126933346192042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,1,128,1,float16,float16,0,0.015024000157912573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,1,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,40,128,1,float16,float16,0,0.013503999759753546
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,2,128,1,float16,float16,0,0.01101333275437355
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,2,128,1,float16,float16,0,0.022357332209746044
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,2,128,1,float16,fp8,0,0.019253333409627277
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,4,128,1,float16,fp8,0,0.011429333438475927
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,8,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,0,0.019333332777023315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,8,128,1,float16,float16,0,0.029487999776999157
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,40,128,1,float16,fp8,0,0.05392533540725708
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,1,128,1,float16,fp8,0,0.012063999970753988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,1,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,2,128,1,float16,float16,0,0.015194666882356008
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,2,128,1,float16,fp8,0,0.014325333138306936
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,0,0.016895999511082966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,4,128,1,float16,float16,0,0.02515733242034912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,0,0.015184000134468079
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,0,0.01922133316596349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,0,0.017978666971127193
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,1,128,1,float16,float16,0,0.011077333241701126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,40,128,1,float16,fp8,0,0.03146666785081228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,40,128,1,float16,float16,0,0.07097599903742473
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,2,128,1,float16,float16,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,2,128,1,float16,fp8,0,0.011887999872366587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,0,0.013002666334311167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,0,0.015125333021084467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,40,128,1,float16,float16,0,0.023365333676338196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,0,0.025594666600227356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,40,128,1,float16,fp8,0,0.02232533444960912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,1,128,1,float16,fp8,0,0.011055999745925268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,1,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,2,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,0,0.01090666651725769
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,4,128,1,float16,float16,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,0,0.011055999745925268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,0,0.01119999960064888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,1,128,1,float16,float16,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,1,128,1,float16,float16,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,1,128,1,float16,float16,0,0.011157333850860596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,2,128,1,float16,float16,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,2,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,0,0.010351999973257383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,0,0.009285333255926767
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,40,128,1,float16,float16,0,0.013264000415802002
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,40,128,1,float16,float16,0,0.017221332838137943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,40,128,1,float16,float16,0,0.03941333293914795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,40,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,0,0.009359999870260557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,2,128,1,float16,float16,0,0.010928000013033548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,1,128,1,float16,fp8,0,0.011077333241701126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,0,0.009237333511312803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,0,0.010149333626031876
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,0,0.009317333499590555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,40,128,1,float16,float16,0,0.012597333639860153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,40,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,1,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,1,128,1,float16,float16,0,0.01091733326514562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,1,128,1,float16,fp8,0,0.01108266661564509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,2,128,1,float16,float16,0,0.010928000013033548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,2,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,2,128,1,float16,fp8,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,40,128,1,float16,fp8,0,0.012810666114091873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,1,128,1,float16,float16,0,0.010079999764760336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,1,128,1,float16,fp8,0,0.009663999701539675
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,2,128,1,float16,float16,0,0.011039999624093374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,1,128,1,float16,float16,0,0.01090666651725769
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,0,0.009008000294367472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,0,0.009216000015536943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,0,0.009018666421373686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,0,0.00891733355820179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,40,128,1,float16,float16,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,40,128,1,float16,fp8,0,0.011391999820868174
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,1,128,1,float16,float16,0,0.011034666250149408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,40,128,1,float16,fp8,0,0.01708799973130226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,2,128,1,float16,float16,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,2,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,0,0.009888000165422758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,2,128,1,float16,fp8,0,0.010821333775917688
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,0,0.00922133338948091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,0,0.009109333157539368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,40,128,1,float16,float16,0,0.010863999525705973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,1,128,1,float16,fp8,0,0.010847999403874079
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,2,128,1,float16,float16,0,0.010847999403874079
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,0,0.010928000013033548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,0,0.009279999881982803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,1,128,1,float16,fp8,0,11.269279479980469
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,1,128,1,float16,float16,0,15.477413177490234
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,2,128,1,float16,float16,0,15.413552602132162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,4,128,1,float16,fp8,0,11.693941752115885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,4,128,1,float16,float16,0,16.10768000284831
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,8,128,1,float16,fp8,0,11.823871612548828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,32,128,1,float16,float16,0,8.237797419230143
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,32,128,1,float16,fp8,0,6.6896317799886065
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,1,128,1,float16,float16,0,7.4418080647786455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,8,128,1,float16,float16,0,16.029978434244793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,1,128,1,float16,fp8,0,5.604650497436523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,2,128,1,float16,fp8,0,5.689594904581706
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,2,128,1,float16,float16,0,7.65338134765625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,4,128,1,float16,fp8,0,5.903573354085286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,2,128,1,float16,fp8,0,11.472943623860678
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,32,128,1,float16,float16,0,3.936885197957357
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,8,128,1,float16,fp8,0,5.995775858561198
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,32,128,1,float16,fp8,0,3.398245175679525
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,1,128,1,float16,fp8,0,2.84771728515625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,2,128,1,float16,float16,0,3.6479199727376304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,4,128,1,float16,fp8,0,3.028239885965983
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,4,128,1,float16,float16,0,3.6762720743815103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,4,128,1,float16,float16,0,7.615631739298503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,1,128,1,float16,float16,0,3.4755786259969077
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,32,128,1,float16,float16,0,2.04311466217041
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,32,128,1,float16,fp8,0,1.9174985885620117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,1,128,1,float16,float16,0,1.803269386291504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,1,128,1,float16,fp8,0,1.4697920481363933
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,2,128,1,float16,fp8,0,1.5264320373535156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,4,128,1,float16,float16,0,1.8981493314107258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,4,128,1,float16,fp8,0,1.5958506266276042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,8,128,1,float16,float16,0,1.8814133008321126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,8,128,1,float16,float16,0,3.8206774393717446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,8,128,1,float16,fp8,0,1.5886720021565754
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,8,128,1,float16,float16,0,7.957312266031901
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,2,128,1,float16,fp8,0,3.0489012400309243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,1,128,1,float16,float16,0,8.781946818033854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,1,128,1,float16,fp8,0,6.38160514831543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,2,128,1,float16,fp8,0,6.567141215006511
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,8,128,1,float16,fp8,0,3.229514757792155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,2,128,1,float16,float16,0,8.870016098022461
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,2,128,1,float16,float16,0,1.8541547457377117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,4,128,1,float16,float16,0,9.136149088541666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,32,128,1,float16,float16,0,4.752058664957683
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,8,128,1,float16,fp8,0,7.0133921305338545
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,32,128,1,float16,fp8,0,4.216901461283366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,8,128,1,float16,float16,0,9.122144063313803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,1,128,1,float16,float16,0,4.050848007202148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,1,128,1,float16,fp8,0,3.6788107554117837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,2,128,1,float16,fp8,0,3.3218294779459634
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,2,128,1,float16,float16,0,4.152271906534831
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,4,128,1,float16,fp8,0,3.453242619832357
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,4,128,1,float16,float16,0,4.291338602701823
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,8,128,1,float16,fp8,0,3.6769227981567383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,4,128,1,float16,fp8,0,7.029882431030273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,32,128,1,float16,fp8,0,2.077631950378418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,1,128,1,float16,float16,0,2.0986293156941733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,2,128,1,float16,fp8,0,1.7112266222635906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,4,128,1,float16,fp8,0,1.8090720176696777
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,8,128,1,float16,float16,0,2.22326930363973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,32,128,1,float16,float16,0,1.2348480224609375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,32,128,1,float16,float16,0,2.3436692555745444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,1,128,1,float16,fp8,0,1.6554667154947917
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,1,128,1,float16,float16,0,1.0802559852600098
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,2,128,1,float16,float16,0,2.0697760581970215
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,1,128,1,float16,fp8,0,0.8734933535257975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,2,128,1,float16,float16,0,1.0713173548380535
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,4,128,1,float16,float16,0,2.1571146647135415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,4,128,1,float16,fp8,0,0.9584960142771403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,8,128,1,float16,fp8,0,1.7991199493408203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,8,128,1,float16,float16,0,1.1255199909210205
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,8,128,1,float16,fp8,0,0.9544426600138346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,8,128,1,float16,float16,0,4.3259627024332685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,32,128,1,float16,fp8,0,1.1909066836039226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,1,128,1,float16,fp8,0,4.484853426615397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,2,128,1,float16,fp8,0,0.8900907039642334
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,1,128,1,float16,float16,0,6.162848154703776
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,2,128,1,float16,fp8,0,4.574165344238281
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,2,128,1,float16,float16,0,6.213493347167969
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,4,128,1,float16,float16,0,6.322101593017578
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,1,128,1,float16,float16,0,2.8218345642089844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,32,128,1,float16,fp8,0,3.013344128926595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,8,128,1,float16,fp8,0,4.985973358154297
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,1,128,1,float16,fp8,0,2.521392027537028
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,32,128,1,float16,float16,0,3.3674186070760093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,4,128,1,float16,float16,0,1.1262293656667073
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,2,128,1,float16,float16,0,2.9065065383911133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,4,128,1,float16,fp8,0,2.591989358266195
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,4,128,1,float16,fp8,0,4.953957239786784
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,8,128,1,float16,float16,0,3.043429374694824
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,8,128,1,float16,fp8,0,2.5413173039754233
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,32,128,1,float16,float16,0,1.7353973388671875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,1,128,1,float16,fp8,0,1.2928373018900554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,2,128,1,float16,float16,0,1.465775966644287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,2,128,1,float16,fp8,0,1.236570676167806
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,2,128,1,float16,fp8,0,2.3357866605122886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,4,128,1,float16,float16,0,1.5651520093282063
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,4,128,1,float16,fp8,0,1.3910133043924968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,4,128,1,float16,float16,0,3.128634770711263
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,8,128,1,float16,float16,0,1.55732266108195
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,8,128,1,float16,fp8,0,1.315077304840088
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,32,128,1,float16,float16,0,0.8855733076731364
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,32,128,1,float16,fp8,0,0.8261120319366455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,2,128,1,float16,float16,0,0.7633333206176758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,1,128,1,float16,float16,0,1.4923572540283203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,2,128,1,float16,fp8,0,0.6505493323008219
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,4,128,1,float16,float16,0,0.8135039806365967
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,8,128,1,float16,float16,0,0.836085319519043
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,8,128,1,float16,fp8,0,0.714080015818278
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,1,128,1,float16,fp8,0,0.6387679974238077
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,8,128,1,float16,float16,0,6.4511362711588545
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,1,128,1,float16,fp8,0,6.040853500366211
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,1,128,1,float16,float16,0,8.069717407226562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,2,128,1,float16,float16,0,8.301104227701822
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,2,128,1,float16,fp8,0,6.084933598836263
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,1,128,1,float16,float16,0,0.7704586982727051
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,32,128,1,float16,fp8,0,1.5410079956054688
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,4,128,1,float16,fp8,0,0.6967840194702148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,4,128,1,float16,float16,0,8.497983932495117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,8,128,1,float16,float16,0,8.603578567504883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,8,128,1,float16,fp8,0,6.756677627563477
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,32,128,1,float16,float16,0,4.512821197509766
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,1,128,1,float16,fp8,0,2.954671859741211
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,1,128,1,float16,float16,0,3.6925973892211914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,2,128,1,float16,float16,0,3.7839574813842773
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,4,128,1,float16,fp8,0,6.787712097167969
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,4,128,1,float16,float16,0,4.098496119181315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,8,128,1,float16,float16,0,4.074384053548177
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,8,128,1,float16,fp8,0,3.596154530843099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,32,128,1,float16,float16,0,2.3268213272094727
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,32,128,1,float16,fp8,0,4.088666598002116
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,1,128,1,float16,float16,0,1.8717439969380696
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,1,128,1,float16,fp8,0,1.7498559951782227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,2,128,1,float16,float16,0,1.907317320505778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,2,128,1,float16,fp8,0,1.566970666249593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,4,128,1,float16,float16,0,2.070890744527181
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,4,128,1,float16,fp8,0,1.7756053606669109
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,2,128,1,float16,fp8,0,3.2476800282796225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,8,128,1,float16,fp8,0,1.7243839899698894
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,4,128,1,float16,fp8,0,3.4580214818318686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,1,128,1,float16,float16,0,0.956933339436849
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,32,128,1,float16,fp8,0,1.0811359882354736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,2,128,1,float16,float16,0,0.9738986492156982
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,2,128,1,float16,fp8,0,0.8153759638468424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,4,128,1,float16,float16,0,1.0417866706848145
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,4,128,1,float16,fp8,0,0.921295960744222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,8,128,1,float16,float16,0,1.0457546710968018
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,32,128,1,float16,float16,0,0.6109866698582967
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,8,128,1,float16,float16,0,2.058549404144287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,1,128,1,float16,float16,0,0.5091360012690226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,2,128,1,float16,float16,0,0.5146613518397013
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,32,128,1,float16,fp8,0,2.0885547002156577
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,4,128,1,float16,float16,0,0.5484853188196818
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,4,128,1,float16,fp8,0,0.49453334013621014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,8,128,1,float16,fp8,0,0.8978506724039713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,8,128,1,float16,fp8,0,0.49807465076446533
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,32,128,1,float16,fp8,0,0.5764266649881998
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,1,128,1,float16,fp8,0,0.43507198492685956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,32,128,1,float16,float16,0,1.1824373404184978
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,1,128,1,float16,float16,0,4.218005180358887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,1,128,1,float16,fp8,0,3.445919990539551
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,2,128,1,float16,float16,0,4.367061297098796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,2,128,1,float16,fp8,0,3.5686559677124023
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,4,128,1,float16,float16,0,4.916026751200358
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,4,128,1,float16,fp8,0,4.207344055175781
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,2,128,1,float16,fp8,0,0.443994681040446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,32,128,1,float16,float16,0,2.7778186798095703
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,8,128,1,float16,float16,0,0.5569493373235067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,1,128,1,float16,float16,0,2.1343679428100586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,32,128,1,float16,fp8,0,2.629690647125244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,2,128,1,float16,float16,0,2.200058619181315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,2,128,1,float16,fp8,0,1.816138744354248
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,4,128,1,float16,float16,0,2.4178345998128257
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,4,128,1,float16,fp8,0,2.0077385902404785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,8,128,1,float16,fp8,0,4.354458808898926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,8,128,1,float16,fp8,0,2.045226732889811
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,32,128,1,float16,float16,0,1.3956212997436523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,1,128,1,float16,float16,0,1.094000021616618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,1,128,1,float16,fp8,0,0.9057706991831461
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,2,128,1,float16,float16,0,1.1153866449991863
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,2,128,1,float16,fp8,0,0.9349119663238525
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,4,128,1,float16,float16,0,1.2322719891866047
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,1,128,1,float16,fp8,0,0.791424036026001
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,8,128,1,float16,float16,0,1.2463253339131672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,8,128,1,float16,fp8,0,1.0943199793497722
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,8,128,1,float16,float16,0,2.418458620707194
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,32,128,1,float16,float16,0,0.7294613520304362
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,1,128,1,float16,float16,0,0.5654933452606201
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,1,128,1,float16,fp8,0,0.48498133818308514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,32,128,1,float16,fp8,0,1.3274400234222412
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,2,128,1,float16,float16,0,0.5867466529210409
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,2,128,1,float16,fp8,0,0.49971731503804523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,1,128,1,float16,fp8,0,1.747968037923177
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,4,128,1,float16,float16,0,0.6363733212153116
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,4,128,1,float16,fp8,0,0.5631946722666422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,8,128,1,float16,fp8,0,0.5620106856028239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,4,128,1,float16,fp8,0,1.0556373596191406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,8,128,1,float16,float16,0,4.96344534556071
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,1,128,1,float16,float16,0,0.3102773427963257
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,32,128,1,float16,fp8,0,0.37829867998758954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,2,128,1,float16,float16,0,0.3139413396517436
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,2,128,1,float16,fp8,0,0.2767626643180847
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,4,128,1,float16,float16,0,0.3429439862569173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,4,128,1,float16,fp8,0,0.3024746576944987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,8,128,1,float16,float16,0,0.34379200140635174
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,8,128,1,float16,fp8,0,0.3113386631011963
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,32,128,1,float16,fp8,0,0.6921172936757406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,8,128,1,float16,float16,0,0.6484213272730509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,1,128,1,float16,fp8,0,3.2881174087524414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,1,128,1,float16,float16,0,4.097327868143718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,2,128,1,float16,fp8,0,3.461423873901367
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,2,128,1,float16,float16,0,4.121829350789388
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,4,128,1,float16,float16,0,4.898858706156413
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,4,128,1,float16,fp8,0,4.124736150105794
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,32,128,1,float16,fp8,0,2.8693065643310547
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,32,128,1,float16,float16,0,0.3893119891484578
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,1,128,1,float16,float16,0,2.024277369181315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,8,128,1,float16,float16,0,4.727141380310059
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,2,128,1,float16,float16,0,2.073871930440267
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,2,128,1,float16,fp8,0,1.751215934753418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,4,128,1,float16,float16,0,2.411626656850179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,4,128,1,float16,fp8,0,2.3740533192952475
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,8,128,1,float16,float16,0,2.4289867083231607
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,8,128,1,float16,fp8,0,2.070693333943685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,1,128,1,float16,fp8,0,1.6675306955973308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,1,128,1,float16,float16,0,1.0192853609720867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,32,128,1,float16,fp8,0,1.4262986183166504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,8,128,1,float16,fp8,0,4.05567995707194
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,2,128,1,float16,float16,0,1.0794400374094646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,2,128,1,float16,fp8,0,0.9018719991048177
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,4,128,1,float16,float16,0,1.1981279850006104
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,4,128,1,float16,fp8,0,1.0913866360982258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,8,128,1,float16,float16,0,1.204799969991048
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,8,128,1,float16,fp8,0,1.1051572958628337
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,32,128,1,float16,float16,0,0.7323040167490641
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,32,128,1,float16,fp8,0,0.729856014251709
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,32,128,1,float16,float16,0,2.9176905949910483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,1,128,1,float16,fp8,0,0.27063467105229694
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,1,128,1,float16,fp8,0,0.8548853397369385
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,4,128,1,float16,float16,0,0.6224853197733561
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,2,128,1,float16,fp8,0,0.4716426531473796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,4,128,1,float16,fp8,0,0.5780746539433798
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,8,128,1,float16,fp8,0,0.5652533372243246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,32,128,1,float16,float16,0,0.38537601629892987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,32,128,1,float16,fp8,0,0.389631986618042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,1,128,1,float16,float16,0,0.5211093425750732
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,1,128,1,float16,fp8,0,0.24980799357096353
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,2,128,1,float16,float16,0,0.2908533414204915
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,1,128,1,float16,fp8,0,0.45047998428344727
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,2,128,1,float16,fp8,0,0.2590506672859192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,4,128,1,float16,float16,0,0.3269226749738057
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,8,128,1,float16,float16,0,0.33378668626149494
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,8,128,1,float16,fp8,0,0.3133973280588786
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,8,128,1,float16,float16,0,0.6148960192998251
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,1,128,1,float16,float16,0,0.162800004084905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,32,128,1,float16,fp8,0,0.2171306610107422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,2,128,1,float16,float16,0,0.16513066490491232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,2,128,1,float16,fp8,0,0.15161066253980002
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,1,128,1,float16,float16,0,0.2863093415896098
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,4,128,1,float16,fp8,0,0.1660319964090983
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,32,128,1,float16,float16,0,1.4393973350524902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,2,128,1,float16,float16,0,0.5477013190587362
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,8,128,1,float16,fp8,0,0.16739199558893839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,4,128,1,float16,fp8,0,0.2985493342081706
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,1,128,1,float16,float16,0,2.3868746757507324
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,2,128,1,float16,float16,0,2.5013440450032554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,1,128,1,float16,fp8,0,0.14549866318702698
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,2,128,1,float16,fp8,0,2.1103893915812173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,4,128,1,float16,float16,0,0.17717333634694418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,8,128,1,float16,float16,0,0.18203200896581015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,4,128,1,float16,fp8,0,2.7252108256022134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,8,128,1,float16,float16,0,2.931152025858561
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,32,128,1,float16,float16,0,1.8683786392211914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,1,128,1,float16,float16,0,1.215674638748169
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,32,128,1,float16,float16,0,0.22402666012446085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,1,128,1,float16,fp8,0,1.108944018681844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,32,128,1,float16,fp8,0,1.8618933359781902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,2,128,1,float16,fp8,0,1.0802026589711506
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,4,128,1,float16,fp8,0,1.4230559666951497
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,4,128,1,float16,float16,0,1.5025866826375325
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,8,128,1,float16,float16,0,1.4537547429402669
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,8,128,1,float16,fp8,0,1.3124852975209553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,4,128,1,float16,float16,0,2.98358949025472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,8,128,1,float16,fp8,0,2.571930726369222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,32,128,1,float16,fp8,0,0.9471786816914877
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,32,128,1,float16,float16,0,0.9227093060811361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,2,128,1,float16,float16,0,0.6369066635767618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,2,128,1,float16,fp8,0,0.556384007136027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,1,128,1,float16,fp8,0,1.9895946184794109
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,4,128,1,float16,float16,0,0.7544693152109782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,2,128,1,float16,float16,0,1.297712008158366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,32,128,1,float16,float16,0,0.47789867719014484
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,32,128,1,float16,fp8,0,0.49132800102233887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,1,128,1,float16,fp8,0,0.28437334299087524
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,1,128,1,float16,float16,0,0.6160533428192139
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,1,128,1,float16,fp8,0,0.5311359961827596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,2,128,1,float16,fp8,0,0.2988213300704956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,4,128,1,float16,float16,0,0.39284801483154297
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,4,128,1,float16,fp8,0,0.3747040033340454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,8,128,1,float16,float16,0,0.3927733500798543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,8,128,1,float16,fp8,0,0.3681066830952962
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,4,128,1,float16,fp8,0,0.6947466532389323
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,1,128,1,float16,float16,0,0.17922665675481161
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,32,128,1,float16,fp8,0,0.2643679976463318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,2,128,1,float16,float16,0,0.1853760083516439
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,2,128,1,float16,fp8,0,0.16430399815241495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,4,128,1,float16,float16,0,0.21574934323628744
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,2,128,1,float16,float16,0,0.3387519915898641
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,8,128,1,float16,float16,0,0.2173653244972229
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,8,128,1,float16,fp8,0,0.20163200298945108
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,32,128,1,float16,fp8,0,0.14961600303649902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,32,128,1,float16,float16,0,0.15810666481653848
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,8,128,1,float16,fp8,0,0.6748639742533366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,1,128,1,float16,float16,0,0.10894933342933655
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,1,128,1,float16,fp8,0,0.15733866890271506
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,2,128,1,float16,fp8,0,0.10100799798965454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,4,128,1,float16,float16,0,0.11711999773979187
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,4,128,1,float16,fp8,0,0.11203733086585999
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,8,128,1,float16,float16,0,0.11910399794578552
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,8,128,1,float16,fp8,0,0.11359467109044392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,8,128,1,float16,float16,0,0.767632007598877
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,1,128,1,float16,float16,0,2.341887950897217
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,1,128,1,float16,fp8,0,0.09693866968154907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,4,128,1,float16,fp8,0,0.18983999888102213
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,2,128,1,float16,fp8,0,2.169930617014567
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,2,128,1,float16,float16,0,2.487562656402588
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,4,128,1,float16,float16,0,3.081258773803711
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,32,128,1,float16,float16,0,0.2628106673558553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,4,128,1,float16,fp8,0,2.8471412658691406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,2,128,1,float16,float16,0,0.10904533664385478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,8,128,1,float16,fp8,0,2.7885545094807944
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,1,128,1,float16,fp8,0,1.0109546979268391
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,32,128,1,float16,fp8,0,2.1358720461527505
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,2,128,1,float16,float16,0,1.2444213231404622
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,2,128,1,float16,fp8,0,1.0912480354309082
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,4,128,1,float16,float16,0,1.5696053504943848
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,32,128,1,float16,float16,0,2.027066707611084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,4,128,1,float16,fp8,0,1.3904906908671062
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,8,128,1,float16,float16,0,1.5931040445963542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,8,128,1,float16,fp8,0,1.4046026865641277
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,1,128,1,float16,float16,0,0.6103893518447876
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,32,128,1,float16,fp8,0,1.0808693567911785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,1,128,1,float16,fp8,0,0.5271733204523722
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,1,128,1,float16,float16,0,0.32676267623901367
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,2,128,1,float16,fp8,0,0.5672479867935181
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,8,128,1,float16,float16,0,3.1345065434773765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,4,128,1,float16,float16,0,0.7886506716410319
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,8,128,1,float16,float16,0,0.7734560171763102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,8,128,1,float16,fp8,0,0.7707520325978597
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,32,128,1,float16,float16,0,0.5139893293380737
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,32,128,1,float16,float16,0,1.013925313949585
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,1,128,1,float16,float16,0,0.30987733602523804
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,32,128,1,float16,fp8,0,0.5535573164621989
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,2,128,1,float16,fp8,0,0.2961706717809041
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,1,128,1,float16,fp8,0,1.9885387420654297
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,4,128,1,float16,float16,0,0.41065065066019696
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,4,128,1,float16,fp8,0,0.4036480188369751
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,1,128,1,float16,float16,0,1.205456018447876
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,8,128,1,float16,float16,0,0.4055519898732503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,1,128,1,float16,float16,0,0.17446933190027872
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,32,128,1,float16,float16,0,0.27983466784159344
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,1,128,1,float16,fp8,0,0.15355733036994934
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,32,128,1,float16,fp8,0,0.29181865851084393
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,2,128,1,float16,fp8,0,0.16516266266504923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,2,128,1,float16,float16,0,0.18316799402236938
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,1,128,1,float16,fp8,0,0.2802079916000366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,2,128,1,float16,float16,0,0.6440586646397909
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,2,128,1,float16,float16,0,0.3298826615015666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,8,128,1,float16,fp8,0,0.21673067410786948
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,4,128,1,float16,fp8,0,0.7314773400624593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,32,128,1,float16,fp8,0,0.16145066420237222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,1,128,1,float16,float16,0,0.09941866993904114
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,2,128,1,float16,float16,0,0.1027893324693044
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,4,128,1,float16,float16,0,0.11450133721033733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,4,128,1,float16,fp8,0,0.10985599954922994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,4,128,1,float16,fp8,0,0.20435200134913126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,8,128,1,float16,float16,0,0.22041600942611694
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,8,128,1,float16,fp8,0,0.3874719937642415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,8,128,1,float16,fp8,0,0.11230933666229248
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,2,128,1,float16,fp8,0,0.09495466947555542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,32,128,1,float16,fp8,0,0.08550399541854858
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,1,128,1,float16,fp8,0,0.05763733386993408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,2,128,1,float16,float16,0,0.06446399788061778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,4,128,1,float16,float16,0,0.21414933602015176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,4,128,1,float16,fp8,0,0.06708799799283345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,8,128,1,float16,float16,0,0.1181813379128774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,8,128,1,float16,float16,0,0.07007466753323872
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,8,128,1,float16,fp8,0,0.06811733543872833
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,32,128,1,float16,float16,0,0.09331199526786804
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,1,128,1,float16,fp8,0,0.08943466345469157
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,2,128,1,float16,fp8,0,0.06020799775918325
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,2,128,1,float16,float16,0,1.553760051727295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,4,128,1,float16,float16,0,0.07041066884994507
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,2,128,1,float16,fp8,0,1.3843894004821777
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,4,128,1,float16,float16,0,2.0062559445699057
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,4,128,1,float16,fp8,0,1.8291840553283691
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,32,128,1,float16,float16,0,0.15069333712259927
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,8,128,1,float16,float16,0,1.9523520469665527
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,1,128,1,float16,float16,0,0.06285333136717479
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,1,128,1,float16,float16,0,0.7413547039031982
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,32,128,1,float16,float16,0,1.352394739786784
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,32,128,1,float16,fp8,0,1.4820747375488281
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,2,128,1,float16,fp8,0,0.7075200080871582
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,4,128,1,float16,float16,0,1.0134399731953938
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,8,128,1,float16,float16,0,1.02182936668396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,8,128,1,float16,fp8,0,0.9329280058542887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,1,128,1,float16,float16,0,1.4770347277323406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,32,128,1,float16,float16,0,0.6861759821573893
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,1,128,1,float16,float16,0,0.37328533331553143
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,32,128,1,float16,fp8,0,0.7526240348815918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,1,128,1,float16,fp8,0,0.3366026480992635
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,2,128,1,float16,fp8,0,0.3687146504720052
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,4,128,1,float16,fp8,0,1.0218933423360188
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,4,128,1,float16,fp8,0,0.4821653366088867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,8,128,1,float16,float16,0,0.5312586625417074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,1,128,1,float16,fp8,0,1.2573440074920654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,32,128,1,float16,float16,0,0.3545706669489543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,1,128,1,float16,fp8,0,0.6425120035807291
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,32,128,1,float16,fp8,0,0.38843198617299396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,1,128,1,float16,float16,0,0.20392000675201416
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,1,128,1,float16,fp8,0,0.18181333939234415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,2,128,1,float16,float16,0,0.21683200200398764
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,4,128,1,float16,float16,0,0.5152426560719808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,4,128,1,float16,float16,0,0.269813338915507
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,4,128,1,float16,fp8,0,0.26869332790374756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,8,128,1,float16,float16,0,0.2639999985694885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,8,128,1,float16,fp8,0,1.860165278116862
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,32,128,1,float16,float16,0,0.20177600781122842
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,1,128,1,float16,float16,0,0.11302399635314941
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,1,128,1,float16,fp8,0,0.10180266698201497
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,2,128,1,float16,float16,0,0.7729439735412598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,2,128,1,float16,float16,0,0.11855467160542806
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,2,128,1,float16,float16,0,0.3975893259048462
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,2,128,1,float16,fp8,0,0.10942932963371277
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,4,128,1,float16,fp8,0,0.13346133629480997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,2,128,1,float16,fp8,0,0.19895466168721518
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,8,128,1,float16,fp8,0,0.1427733302116394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,8,128,1,float16,fp8,0,0.262773334980011
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,32,128,1,float16,float16,0,0.10753066341082256
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,32,128,1,float16,fp8,0,0.11387200156847636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,32,128,1,float16,fp8,0,0.20666666825612387
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,1,128,1,float16,fp8,0,0.062496001521746315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,2,128,1,float16,float16,0,0.07042133311430614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,2,128,1,float16,fp8,0,0.06571733454863231
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,4,128,1,float16,float16,0,0.07747200131416321
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,4,128,1,float16,fp8,0,0.07668800155321757
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,8,128,1,float16,fp8,0,0.07784533500671387
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,4,128,1,float16,float16,0,0.15272000432014465
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,32,128,1,float16,float16,0,0.0582826683918635
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,8,128,1,float16,float16,0,0.14962133765220642
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,32,128,1,float16,fp8,0,0.06217599908510844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,1,128,1,float16,fp8,0,0.04247466723124186
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,8,128,1,float16,fp8,0,0.5054613351821899
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,4,128,1,float16,float16,0,0.04994133114814758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,4,128,1,float16,fp8,0,0.0498986691236496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,8,128,1,float16,fp8,0,0.049786667029062905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,8,128,1,float16,float16,0,0.07810666660467784
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,1,128,1,float16,float16,0,1.5057652791341145
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,1,128,1,float16,fp8,0,1.336143970489502
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,2,128,1,float16,float16,0,0.0469813346862793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,1,128,1,float16,float16,0,0.06678399940331776
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,2,128,1,float16,fp8,0,0.04362666606903076
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,8,128,1,float16,float16,0,0.04993066688378652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,2,128,1,float16,fp8,0,1.5145386060078938
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,4,128,1,float16,float16,0,2.374330679575602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,8,128,1,float16,float16,0,2.193120002746582
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,1,128,1,float16,float16,0,0.04588800172011057
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,32,128,1,float16,float16,0,1.6023359298706055
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,1,128,1,float16,float16,0,0.7793333530426025
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,2,128,1,float16,float16,0,1.6820425987243652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,1,128,1,float16,fp8,0,0.6827733516693115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,2,128,1,float16,float16,0,0.81822403271993
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,2,128,1,float16,fp8,0,0.7674667040506998
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,4,128,1,float16,float16,0,1.146783987681071
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,4,128,1,float16,fp8,0,1.1492586930592854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,8,128,1,float16,float16,0,1.1760746637980144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,8,128,1,float16,fp8,0,1.1458186308542888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,4,128,1,float16,fp8,0,2.097391923268636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,1,128,1,float16,fp8,0,0.3519359827041626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,32,128,1,float16,float16,0,0.8029226462046305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,32,128,1,float16,fp8,0,0.9086293379465739
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,32,128,1,float16,fp8,0,1.7939200401306152
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,2,128,1,float16,fp8,0,0.401583989461263
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,4,128,1,float16,float16,0,0.5642026662826538
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,8,128,1,float16,float16,0,0.562661329905192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,4,128,1,float16,fp8,0,0.5542826652526855
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,1,128,1,float16,float16,0,0.20896534125010172
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,1,128,1,float16,fp8,0,0.18918933471043906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,32,128,1,float16,fp8,0,0.463861346244812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,8,128,1,float16,fp8,0,2.108213265736898
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,2,128,1,float16,fp8,0,0.20822399854660034
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,2,128,1,float16,float16,0,0.4291786750157674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,8,128,1,float16,float16,0,0.2890560030937195
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,8,128,1,float16,fp8,0,0.28818132479985553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,8,128,1,float16,fp8,0,0.5872799952824911
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,32,128,1,float16,float16,0,0.22766399383544922
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,32,128,1,float16,fp8,0,0.24230400721232095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,32,128,1,float16,float16,0,0.40807998180389404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,1,128,1,float16,float16,0,0.39586134751637775
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,2,128,1,float16,float16,0,0.12356799840927124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,2,128,1,float16,fp8,0,0.11819199721018474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,4,128,1,float16,float16,0,0.15718400478363037
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,8,128,1,float16,fp8,0,0.1606613298257192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,8,128,1,float16,float16,0,0.16563733418782553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,32,128,1,float16,float16,0,0.13354133566220602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,32,128,1,float16,fp8,0,0.13146666685740152
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,1,128,1,float16,fp8,0,0.060271998246510826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,2,128,1,float16,float16,0,0.06855999926726024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,2,128,1,float16,fp8,0,0.06570666531721751
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,4,128,1,float16,fp8,0,0.15430933237075806
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,4,128,1,float16,float16,0,0.08066666622956593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,8,128,1,float16,float16,0,0.08533866206804912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,8,128,1,float16,fp8,0,0.08240533371766408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,1,128,1,float16,float16,0,0.06468266745408376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,32,128,1,float16,float16,0,0.07275199890136719
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,1,128,1,float16,float16,0,0.042410666743914284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,32,128,1,float16,fp8,0,0.06669333577156067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,1,128,1,float16,fp8,0,0.03961066653331121
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,2,128,1,float16,fp8,0,0.041759997606277466
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,4,128,1,float16,float16,0,0.04897599915663401
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,1,128,1,float16,float16,0,0.1151093343893687
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,4,128,1,float16,fp8,0,0.04821333289146423
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,8,128,1,float16,fp8,0,0.05007466673851013
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,32,128,1,float16,float16,0,0.0340639998515447
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,4,128,1,float16,fp8,0,0.08055999875068665
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,32,128,1,float16,fp8,0,0.03930133332808813
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,1,128,1,float16,float16,0,0.028143999477227528
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,1,128,1,float16,fp8,0,0.025370667378107708
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,2,128,1,float16,fp8,0,0.02733866622050603
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,4,128,1,float16,float16,0,0.030400000512599945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,4,128,1,float16,fp8,0,0.030453334252039593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,4,128,1,float16,float16,0,0.2992639938990275
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,2,128,1,float16,float16,0,0.04329599936803182
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,8,128,1,float16,fp8,0,0.031317333380381264
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,2,128,1,float16,float16,0,0.027637332677841187
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,2,128,1,float16,float16,0,0.22143999735514322
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,1,128,1,float16,float16,0,1.111792008082072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,4,128,1,float16,fp8,0,0.3123040000597636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,1,128,1,float16,fp8,0,1.0060160160064697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,2,128,1,float16,float16,0,1.2516427040100098
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,2,128,1,float16,fp8,0,1.1762453715006511
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,8,128,1,float16,float16,0,0.04995200037956238
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,4,128,1,float16,float16,0,1.9158132870992024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,4,128,1,float16,fp8,0,1.6220213572184246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,8,128,1,float16,float16,0,0.03147733211517334
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,8,128,1,float16,float16,0,1.9218719800313313
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,32,128,1,float16,float16,0,1.377797285715739
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,1,128,1,float16,fp8,0,0.5170400142669678
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,32,128,1,float16,fp8,0,1.6185760498046875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,1,128,1,float16,fp8,0,0.10472533106803894
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,2,128,1,float16,fp8,0,0.59443199634552
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,4,128,1,float16,fp8,0,0.8908960024515787
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,2,128,1,float16,float16,0,0.6388426621754965
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,32,128,1,float16,float16,0,0.692202647527059
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,1,128,1,float16,float16,0,0.5552106698354086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,8,128,1,float16,fp8,0,0.9851893583933512
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,1,128,1,float16,fp8,0,0.26923199494679767
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,32,128,1,float16,fp8,0,0.8184106349945068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,4,128,1,float16,float16,0,0.9258560339609782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,2,128,1,float16,fp8,0,0.3079413374265035
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,4,128,1,float16,float16,0,0.4787253141403198
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,4,128,1,float16,fp8,0,0.44436268011728924
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,8,128,1,float16,float16,0,0.45546666781107586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,8,128,1,float16,fp8,0,0.47362132867177326
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,32,128,1,float16,float16,0,0.3545759916305542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,8,128,1,float16,float16,0,0.9687200387318929
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,32,128,1,float16,fp8,0,0.4169866641362508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,2,128,1,float16,float16,0,0.17124799887339273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,2,128,1,float16,float16,0,0.32363732655843097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,1,128,1,float16,float16,0,0.15412799517313638
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,4,128,1,float16,fp8,0,0.2688960035641988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,4,128,1,float16,float16,0,0.25621867179870605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,1,128,1,float16,fp8,0,0.14548800388971964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,2,128,1,float16,fp8,0,0.1641386648019155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,1,128,1,float16,float16,0,0.0884320040543874
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,8,128,1,float16,fp8,0,0.24077866474787393
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,2,128,1,float16,float16,0,0.0953493316968282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,1,128,1,float16,float16,0,0.28702400128046673
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,4,128,1,float16,fp8,0,0.13209066788355509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,2,128,1,float16,fp8,0,0.09099200367927551
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,8,128,1,float16,float16,0,0.1338986655076345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,8,128,1,float16,float16,0,0.23836799462636313
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,32,128,1,float16,float16,0,0.18714133898417154
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,32,128,1,float16,float16,0,0.10139733552932739
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,1,128,1,float16,fp8,0,0.07863466441631317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,1,128,1,float16,float16,0,0.04920533299446106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,2,128,1,float16,float16,0,0.052005335688591
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,2,128,1,float16,fp8,0,0.051642666260401406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,4,128,1,float16,float16,0,0.06409599880377452
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,8,128,1,float16,fp8,0,0.13050666451454163
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,8,128,1,float16,float16,0,0.06912533442179362
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,32,128,1,float16,fp8,0,0.2182719906171163
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,32,128,1,float16,fp8,0,0.11664533615112305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,1,128,1,float16,float16,0,0.03146133323510488
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,32,128,1,float16,fp8,0,0.058149332801500954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,4,128,1,float16,float16,0,0.13075199723243713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,2,128,1,float16,float16,0,0.03349866718053818
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,4,128,1,float16,fp8,0,0.06632000207901001
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,2,128,1,float16,fp8,0,0.031514666974544525
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,4,128,1,float16,fp8,0,0.039642666776975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,8,128,1,float16,float16,0,0.039642666776975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,8,128,1,float16,fp8,0,0.06846400101979573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,8,128,1,float16,fp8,0,0.039893334110577904
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,1,128,1,float16,float16,0,0.019152000546455383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,8,128,1,float16,fp8,0,1.759050687154134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,32,128,1,float16,fp8,0,0.033520000676314034
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,2,128,1,float16,fp8,0,0.021274665991465252
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,1,128,1,float16,fp8,0,0.04609066744645437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,1,128,1,float16,fp8,0,0.029391999046007793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,4,128,1,float16,float16,0,0.023605334262053173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,2,128,1,float16,float16,0,0.021312000850836437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,8,128,1,float16,float16,0,0.02399466683467229
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,4,128,1,float16,float16,0,0.03793066740036011
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,8,128,1,float16,fp8,0,0.025407999753952026
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,1,128,1,float16,float16,0,0.01844800015290578
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,1,128,1,float16,fp8,0,0.017792000124851864
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,2,128,1,float16,float16,0,0.018863999595244724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,32,128,1,float16,fp8,0,0.02555199960867564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,2,128,1,float16,fp8,0,0.01817600056529045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,32,128,1,float16,float16,0,0.027514666318893433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,4,128,1,float16,fp8,0,0.020581333587567013
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,8,128,1,float16,fp8,0,0.021333334346612293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,8,128,1,float16,float16,0,0.0205226664741834
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,4,128,1,float16,fp8,0,0.025349333882331848
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,1,128,1,float16,fp8,0,0.42340266704559326
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,32,128,1,float16,float16,0,0.02165866643190384
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,1,128,1,float16,float16,0,0.447653333346049
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,2,128,1,float16,float16,0,0.5121333201726278
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,4,128,1,float16,float16,0,0.01916266605257988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,2,128,1,float16,fp8,0,0.5060746669769287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,1,128,1,float16,fp8,0,0.019215999792019527
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,4,128,1,float16,float16,0,0.8172319730122884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,4,128,1,float16,fp8,0,0.8807893594106039
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,1,128,1,float16,float16,0,0.23120532433191934
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,32,128,1,float16,float16,0,0.6365280151367188
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,32,128,1,float16,fp8,0,0.7701333363850912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,8,128,1,float16,float16,0,0.8617813587188721
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,1,128,1,float16,fp8,0,0.22031466166178384
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,8,128,1,float16,fp8,0,0.8473760286966959
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,4,128,1,float16,float16,0,0.42214401563008624
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,32,128,1,float16,float16,0,0.056261335810025535
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,8,128,1,float16,float16,0,0.42980798085530597
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,4,128,1,float16,fp8,0,0.4033120075861613
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,32,128,1,float16,fp8,0,0.39156798521677655
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,32,128,1,float16,float16,0,0.3269919951756795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,2,128,1,float16,float16,0,0.15017599860827127
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,2,128,1,float16,float16,0,0.2709546685218811
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,2,128,1,float16,fp8,0,0.14687466621398926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,2,128,1,float16,fp8,0,0.2688639958699544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,4,128,1,float16,fp8,0,0.24133867025375366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,8,128,1,float16,float16,0,0.2174560030301412
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,8,128,1,float16,fp8,0,0.2153173287709554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,1,128,1,float16,float16,0,0.07130133112271626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,1,128,1,float16,fp8,0,0.11867733796437581
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,1,128,1,float16,fp8,0,0.06412800153096516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,2,128,1,float16,float16,0,0.07852800190448761
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,2,128,1,float16,fp8,0,0.07452799876530965
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,4,128,1,float16,fp8,0,0.12255466977755229
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,8,128,1,float16,float16,0,0.1204853355884552
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,4,128,1,float16,float16,0,0.2262079914410909
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,8,128,1,float16,fp8,0,0.1251466671625773
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,1,128,1,float16,float16,0,0.03986666599909464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,32,128,1,float16,float16,0,0.0950933297475179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,1,128,1,float16,float16,0,0.1257866621017456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,2,128,1,float16,float16,0,0.04386133452256521
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,2,128,1,float16,fp8,0,0.04385599990685781
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,4,128,1,float16,float16,0,0.055455997586250305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,4,128,1,float16,float16,0,0.11262399951616923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,4,128,1,float16,fp8,0,0.058335999647776283
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,8,128,1,float16,fp8,0,0.061706667145093284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,32,128,1,float16,float16,0,0.04866666595141093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,1,128,1,float16,float16,0,0.025402667621771496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,1,128,1,float16,fp8,0,0.037952000896135964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,32,128,1,float16,fp8,0,0.20256533225377402
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,2,128,1,float16,float16,0,0.027029333015282948
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,2,128,1,float16,fp8,0,0.02532266577084859
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,4,128,1,float16,float16,0,0.031498665610949196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,8,128,1,float16,float16,0,0.05876799921194712
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,8,128,1,float16,fp8,0,0.03422933320204417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,32,128,1,float16,fp8,0,0.05188799897829691
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,32,128,1,float16,float16,0,0.023269332945346832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,32,128,1,float16,fp8,0,0.029498666524887085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,1,128,1,float16,fp8,0,0.023333333432674408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,2,128,1,float16,float16,0,0.01717866708834966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,2,128,1,float16,fp8,0,0.017573333034912746
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,4,128,1,float16,fp8,0,0.03348266581694285
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,4,128,1,float16,float16,0,0.02072000006834666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,4,128,1,float16,fp8,0,0.021269333859284718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,8,128,1,float16,float16,0,0.032069332897663116
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,8,128,1,float16,float16,0,0.021151999632517498
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,8,128,1,float16,fp8,0,0.02327999969323476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,1,128,1,float16,float16,0,0.015386667102575302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,32,128,1,float16,fp8,0,0.11009066303571065
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,1,128,1,float16,float16,0,0.016629333297411602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,2,128,1,float16,float16,0,0.015583999454975128
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,2,128,1,float16,fp8,0,0.015392000476519266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,4,128,1,float16,float16,0,0.01704000060757001
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,4,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,8,128,1,float16,float16,0,0.017610666652520496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,8,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,32,128,1,float16,fp8,0,0.01846933364868164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,32,128,1,float16,float16,0,0.01639466608564059
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,32,128,1,float16,fp8,0,0.023285334308942158
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,32,128,1,float16,float16,0,0.1710240046183268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,1,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,2,128,1,float16,fp8,0,0.015130666395028433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,4,128,1,float16,float16,0,0.015173333386580149
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,4,128,1,float16,fp8,0,0.015135999768972397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,8,128,1,float16,float16,0,0.01515199989080429
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,8,128,1,float16,fp8,0,0.015130666395028433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,1,128,1,float16,float16,0,0.015157333264748255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,1,128,1,float16,float16,0,0.2707039912541707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,1,128,1,float16,fp8,0,0.015168000012636185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,2,128,1,float16,float16,0,0.015205333630243937
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,1,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,2,128,1,float16,float16,0,0.31332266330718994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,4,128,1,float16,float16,0,0.4526666800181071
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,8,128,1,float16,fp8,0,0.4116106828053792
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,8,128,1,float16,float16,0,0.43617065747578937
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,1,128,1,float16,fp8,0,0.27028266588846844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,32,128,1,float16,float16,0,0.3439093430836995
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,2,128,1,float16,fp8,0,0.307861328125
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,1,128,1,float16,float16,0,0.14644267161687216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,32,128,1,float16,fp8,0,0.41327468554178876
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,4,128,1,float16,fp8,0,0.4816906849543254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,4,128,1,float16,float16,0,0.23618666330973306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,2,128,1,float16,fp8,0,0.16248533129692078
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,2,128,1,float16,float16,0,0.1628266672293345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,8,128,1,float16,fp8,0,0.46849600474039715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,4,128,1,float16,fp8,0,0.2380160093307495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,8,128,1,float16,fp8,0,0.24978667497634888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,8,128,1,float16,float16,0,0.23974400758743286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,32,128,1,float16,float16,0,0.19351466496785483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,1,128,1,float16,float16,0,0.0812799980243047
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,1,128,1,float16,fp8,0,0.14552533626556396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,32,128,1,float16,fp8,0,0.21381332476933798
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,2,128,1,float16,float16,0,0.0890826682249705
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,2,128,1,float16,fp8,0,0.08782399694124858
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,4,128,1,float16,fp8,0,0.13060800234476724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,8,128,1,float16,fp8,0,0.12689600388209024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,32,128,1,float16,float16,0,0.09531199932098389
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,1,128,1,float16,float16,0,0.044106667240460716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,1,128,1,float16,fp8,0,0.04161066561937332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,2,128,1,float16,float16,0,0.04573333263397217
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,2,128,1,float16,fp8,0,0.04688533147176107
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,1,128,1,float16,fp8,0,0.07678399980068207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,4,128,1,float16,fp8,0,0.06237866481145223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,8,128,1,float16,float16,0,0.06620266536871593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,8,128,1,float16,fp8,0,0.06412266691525777
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,32,128,1,float16,float16,0,0.019194666296243668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,32,128,1,float16,fp8,0,0.1125920017560323
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,1,128,1,float16,float16,0,0.02754133443037669
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,32,128,1,float16,fp8,0,0.05624000231424967
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,2,128,1,float16,fp8,0,0.029365333418051403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,4,128,1,float16,float16,0,0.03494933247566223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,4,128,1,float16,fp8,0,0.036517334481080375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,8,128,1,float16,float16,0,0.03494933247566223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,8,128,1,float16,fp8,0,0.03761066744724909
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,32,128,1,float16,fp8,0,0.03136000037193298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,32,128,1,float16,float16,0,0.025381334125995636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,1,128,1,float16,float16,0,0.017429333180189133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,1,128,1,float16,fp8,0,0.027327999472618103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,2,128,1,float16,float16,0,0.019066666563351948
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,2,128,1,float16,float16,0,0.029071999092896778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,4,128,1,float16,float16,0,0.02128000060717265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,4,128,1,float16,fp8,0,0.023103999594847362
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,8,128,1,float16,fp8,0,0.023472001155217487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,32,128,1,float16,float16,0,0.017231999586025875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,4,128,1,float16,float16,0,0.05885333319505056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,1,128,1,float16,fp8,0,0.01735466718673706
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,1,128,1,float16,float16,0,0.012928000340859095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,2,128,1,float16,float16,0,0.01312000056107839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,8,128,1,float16,float16,0,0.021136000752449036
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,2,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,4,128,1,float16,float16,0,0.01505600040157636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,8,128,1,float16,float16,0,0.015077333897352219
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,8,128,1,float16,fp8,0,0.015184000134468079
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,4,128,1,float16,float16,0,0.1218773325284322
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,32,128,1,float16,fp8,0,0.021338666478792827
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,32,128,1,float16,float16,0,0.01333333303531011
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,2,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,1,128,1,float16,float16,0,0.013199999928474426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,1,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,2,128,1,float16,fp8,0,0.013178666432698568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,32,128,1,float16,fp8,0,0.015103999525308609
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,4,128,1,float16,fp8,0,0.013274667163689932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,4,128,1,float16,fp8,0,0.015301333119471868
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,8,128,1,float16,fp8,0,0.013338666409254074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,32,128,1,float16,float16,0,0.012981332838535309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,32,128,1,float16,fp8,0,0.014256000518798828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,1,128,1,float16,float16,0,0.01221866657336553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,2,128,1,float16,float16,0,0.012437333663304647
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,2,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,2,128,1,float16,float16,0,0.013104000439246496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,4,128,1,float16,float16,0,0.013141332815090815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,4,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,8,128,1,float16,float16,0,0.012949333836634954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,8,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,8,128,1,float16,float16,0,0.12484799822171529
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,1,128,1,float16,float16,0,0.22830400864283243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,1,128,1,float16,fp8,0,0.21747199694315592
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,4,128,1,float16,float16,0,0.011194666226704916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,2,128,1,float16,float16,0,0.24297600984573364
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,2,128,1,float16,fp8,0,0.23796266317367554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,4,128,1,float16,float16,0,0.31675199667612713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,4,128,1,float16,fp8,0,0.3147253394126892
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,32,128,1,float16,float16,0,0.05807999769846598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,8,128,1,float16,fp8,0,0.3139306704203288
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,1,128,1,float16,float16,0,0.12426666418711345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,1,128,1,float16,fp8,0,0.11846933762232463
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,8,128,1,float16,float16,0,0.013082666943470636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,2,128,1,float16,float16,0,0.12970667084058127
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,4,128,1,float16,float16,0,0.16487466295560202
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,1,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,4,128,1,float16,fp8,0,0.1664906640847524
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,8,128,1,float16,float16,0,0.3164106607437134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,8,128,1,float16,float16,0,0.16294933358828226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,32,128,1,float16,float16,0,0.21980265776316324
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,1,128,1,float16,float16,0,0.0656160016854604
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,32,128,1,float16,fp8,0,0.1320373316605886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,2,128,1,float16,float16,0,0.06845866640408833
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,2,128,1,float16,fp8,0,0.1276479959487915
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,1,128,1,float16,fp8,0,0.06189866860707601
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,2,128,1,float16,fp8,0,0.06674133241176605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,4,128,1,float16,fp8,0,0.0824480007092158
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,8,128,1,float16,float16,0,0.0864533285299937
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,8,128,1,float16,fp8,0,0.08408000071843465
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,1,128,1,float16,float16,0,0.037802666425704956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,32,128,1,float16,fp8,0,0.0625493327776591
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,32,128,1,float16,fp8,0,0.2507306734720866
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,32,128,1,float16,float16,0,0.1269546647866567
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,1,128,1,float16,fp8,0,0.035818666219711304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,4,128,1,float16,float16,0,0.04403733213742574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,2,128,1,float16,fp8,0,0.03769599894682566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,4,128,1,float16,float16,0,0.08497066299120586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,4,128,1,float16,fp8,0,0.04582933088143667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,8,128,1,float16,float16,0,0.04513066510359446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,8,128,1,float16,fp8,0,0.04576533536116282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,1,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,1,128,1,float16,fp8,0,0.023215999205907185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,32,128,1,float16,fp8,0,0.03764266769091288
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,32,128,1,float16,float16,0,0.03164800008138021
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,2,128,1,float16,float16,0,0.024826665719350178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,8,128,1,float16,fp8,0,0.17483733097712198
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,4,128,1,float16,float16,0,0.027456000447273254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,4,128,1,float16,fp8,0,0.0273333340883255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,8,128,1,float16,float16,0,0.02743999908367793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,32,128,1,float16,float16,0,0.019402666638294857
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,1,128,1,float16,float16,0,0.016293333222468693
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,2,128,1,float16,float16,0,0.01609066625436147
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,1,128,1,float16,fp8,0,0.016458666572968166
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,2,128,1,float16,fp8,0,0.016197333733240765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,32,128,1,float16,float16,0,0.0662720004717509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,4,128,1,float16,fp8,0,0.017562666287024815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,2,128,1,float16,fp8,0,0.02526933451493581
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,8,128,1,float16,fp8,0,0.019007999449968338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,8,128,1,float16,fp8,0,0.029578665892283123
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,32,128,1,float16,float16,0,0.013359999905029932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,32,128,1,float16,fp8,0,0.023520000278949738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,2,128,1,float16,float16,0,0.012906666845083237
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,1,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,2,128,1,float16,fp8,0,0.013157332936922709
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,1,128,1,float16,float16,0,0.023557332654794056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,4,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,8,128,1,float16,float16,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,8,128,1,float16,float16,0,0.017386666188637417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,32,128,1,float16,float16,0,0.013093333691358566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,32,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,1,128,1,float16,float16,0,0.011125333607196808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,32,128,1,float16,fp8,0,0.015599999576807022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,2,128,1,float16,float16,0,0.011109333485364914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,2,128,1,float16,fp8,0,0.01249066616098086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,4,128,1,float16,float16,0,0.01108266661564509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,4,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,4,128,1,float16,float16,0,0.01740266631046931
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,8,128,1,float16,float16,0,0.011407999942700068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,8,128,1,float16,fp8,0,0.011472000430027643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,2,128,1,float16,float16,0,0.037952000896135964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,1,128,1,float16,float16,0,0.01116266722480456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,32,128,1,float16,float16,0,0.01303999995191892
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,2,128,1,float16,float16,0,0.011157333850860596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,1,128,1,float16,fp8,0,0.011136000355084738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,4,128,1,float16,float16,0,0.011146667102972666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,1,128,1,float16,fp8,0,0.012666666259368261
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,1,128,1,float16,float16,0,0.012991999586423239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,8,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,4,128,1,float16,float16,0,0.012981332838535309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,1,128,1,float16,float16,0,0.19695999224980673
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,1,128,1,float16,fp8,0,0.19113065799077353
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,8,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,32,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,2,128,1,float16,float16,0,0.20614399512608847
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,2,128,1,float16,fp8,0,0.01190399999419848
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,4,128,1,float16,fp8,0,0.011733333269755045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,4,128,1,float16,float16,0,0.24411199490229288
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,4,128,1,float16,fp8,0,0.23973333835601807
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,8,128,1,float16,float16,0,0.24549333254496256
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,32,128,1,float16,float16,0,0.16339733203252158
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,32,128,1,float16,fp8,0,0.16891199350357056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,2,128,1,float16,float16,0,0.10946666200955708
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,2,128,1,float16,fp8,0,0.10478933652242024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,2,128,1,float16,fp8,0,0.19963733355204263
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,4,128,1,float16,fp8,0,0.11914666493733723
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,8,128,1,float16,float16,0,0.12804800271987915
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,8,128,1,float16,float16,0,0.011312000453472137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,8,128,1,float16,fp8,0,0.12602133552233377
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,32,128,1,float16,float16,0,0.0906880001227061
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,32,128,1,float16,fp8,0,0.08521599570910136
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,1,128,1,float16,fp8,0,0.0540533314148585
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,2,128,1,float16,float16,0,0.06018133461475372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,4,128,1,float16,float16,0,0.06555200119813283
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,4,128,1,float16,fp8,0,0.06378666559855144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,8,128,1,float16,float16,0,0.06563733518123627
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,8,128,1,float16,fp8,0,0.06427733103434245
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,8,128,1,float16,fp8,0,0.2385973334312439
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,32,128,1,float16,float16,0,0.04170133173465729
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,1,128,1,float16,float16,0,0.05963733295599619
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,2,128,1,float16,float16,0,0.035887998839219414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,1,128,1,float16,fp8,0,0.031445334355036415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,2,128,1,float16,fp8,0,0.05638933181762695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,4,128,1,float16,float16,0,0.1291093329588572
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,4,128,1,float16,fp8,0,0.03559466699759165
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,8,128,1,float16,float16,0,0.037978666524092354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,8,128,1,float16,fp8,0,0.03724266588687897
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,32,128,1,float16,float16,0,0.025589334468046825
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,32,128,1,float16,fp8,0,0.04450666904449463
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,1,128,1,float16,float16,0,0.10744532942771912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,1,128,1,float16,fp8,0,0.021173333128293354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,1,128,1,float16,float16,0,0.023344000180562336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,1,128,1,float16,float16,0,0.03383466601371765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,2,128,1,float16,fp8,0,0.021242665747801464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,4,128,1,float16,float16,0,0.023205332458019257
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,8,128,1,float16,float16,0,0.02348800003528595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,8,128,1,float16,fp8,0,0.02349333216746648
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,32,128,1,float16,fp8,0,0.029477333029111225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,1,128,1,float16,fp8,0,0.09815466403961182
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,1,128,1,float16,float16,0,0.015285332997639975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,32,128,1,float16,fp8,0,0.01924266666173935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,1,128,1,float16,fp8,0,0.01515199989080429
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,2,128,1,float16,fp8,0,0.03384533276160558
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,2,128,1,float16,fp8,0,0.01526933287580808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,4,128,1,float16,float16,0,0.015850666910409927
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,4,128,1,float16,fp8,0,0.015168000012636185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,8,128,1,float16,float16,0,0.01587733378012975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,32,128,1,float16,float16,0,0.018112000077962875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,32,128,1,float16,float16,0,0.01313599944114685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,2,128,1,float16,float16,0,0.021231998999913532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,32,128,1,float16,fp8,0,0.01470400020480156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,1,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,2,128,1,float16,float16,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,4,128,1,float16,fp8,0,0.02313599983851115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,4,128,1,float16,float16,0,0.01201066623131434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,4,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,8,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,8,128,1,float16,float16,0,0.01303999995191892
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,8,128,1,float16,fp8,0,0.01595199977358182
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,32,128,1,float16,float16,0,0.013088000317414602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,1,128,1,float16,fp8,0,0.011301333705584208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,2,128,1,float16,float16,0,0.011237333218256632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,4,128,1,float16,float16,0,0.011594666788975397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,4,128,1,float16,fp8,0,0.012015999605258306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,2,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,8,128,1,float16,float16,0,0.011221333096424738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,8,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,32,128,1,float16,float16,0,0.012154666086037954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,32,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,1,128,1,float16,float16,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,1,128,1,float16,fp8,0,0.011152000476916632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,2,128,1,float16,fp8,0,0.011338666081428528
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,1,128,1,float16,float16,0,0.013162666310866674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,4,128,1,float16,float16,0,0.010933333386977514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,4,128,1,float16,float16,0,0.03772799919048945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,8,128,1,float16,float16,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,8,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,1,128,1,float16,float16,0,0.011007999380429586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,1,128,1,float16,float16,0,0.18817599614461264
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,2,128,1,float16,float16,0,0.19449599583943686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,2,128,1,float16,float16,0,0.015439999600251516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,2,128,1,float16,fp8,0,0.17754133542378744
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,4,128,1,float16,float16,0,0.21160000562667847
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,4,128,1,float16,fp8,0,0.1975626746813456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,8,128,1,float16,float16,0,0.20886399348576865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,1,128,1,float16,fp8,0,0.17315733432769775
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,32,128,1,float16,float16,0,0.13110400239626566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,32,128,1,float16,fp8,0,0.12733333309491476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,1,128,1,float16,fp8,0,0.09321600198745728
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,2,128,1,float16,float16,0,0.10379200180371602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,32,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,2,128,1,float16,fp8,0,0.09426666299502055
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,4,128,1,float16,float16,0,0.10802132884661357
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,4,128,1,float16,fp8,0,0.10147733489672343
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,1,128,1,float16,float16,0,0.10044800241788228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,8,128,1,float16,fp8,0,0.10276266932487488
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,4,128,1,float16,fp8,0,0.01108266661564509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,32,128,1,float16,float16,0,0.07380266487598419
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,1,128,1,float16,float16,0,0.05680533250172933
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,1,128,1,float16,fp8,0,0.05087999999523163
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,32,128,1,float16,fp8,0,0.06458133459091187
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,2,128,1,float16,float16,0,0.05774400134881338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,4,128,1,float16,float16,0,0.0605973352988561
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,4,128,1,float16,fp8,0,0.05637866755326589
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,8,128,1,float16,fp8,0,0.05609600245952606
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,32,128,1,float16,float16,0,0.03667200108369192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,32,128,1,float16,fp8,0,0.03782399992148081
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,1,128,1,float16,float16,0,0.03356266766786575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,1,128,1,float16,fp8,0,0.03138133386770884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,2,128,1,float16,float16,0,0.03335466732581457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,2,128,1,float16,fp8,0,0.05230933427810669
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,2,128,1,float16,fp8,0,0.03146666785081228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,4,128,1,float16,float16,0,0.034874667723973594
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,8,128,1,float16,float16,0,0.060047999024391174
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,8,128,1,float16,float16,0,0.03583466758330663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,8,128,1,float16,fp8,0,0.03245333333810171
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,8,128,1,float16,fp8,0,0.20415467023849487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,1,128,1,float16,float16,0,0.021536000072956085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,1,128,1,float16,fp8,0,0.021146667500336964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,2,128,1,float16,float16,0,0.021546666820844013
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,2,128,1,float16,fp8,0,0.02128000060717265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,4,128,1,float16,fp8,0,0.021295999487241108
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,4,128,1,float16,fp8,0,0.03256533294916153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,2,128,1,float16,fp8,0,0.011461333682139715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,8,128,1,float16,fp8,0,0.021231998999913532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,32,128,1,float16,float16,0,0.017045332739750545
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,32,128,1,float16,fp8,0,0.01724799970785777
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,1,128,1,float16,float16,0,0.01504533365368843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,1,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,2,128,1,float16,float16,0,0.015253332753976187
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,2,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,4,128,1,float16,float16,0,0.015237333873907724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,8,128,1,float16,float16,0,0.015189333508412043
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,2,128,1,float16,float16,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,8,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,32,128,1,float16,float16,0,0.013023999830087027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,32,128,1,float16,float16,0,0.023178666830062866
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,1,128,1,float16,float16,0,0.01257066677014033
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,1,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,4,128,1,float16,float16,0,0.021525333325068157
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,4,128,1,float16,fp8,0,0.015226667126019796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,2,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,4,128,1,float16,float16,0,0.011760000139474869
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,8,128,1,float16,float16,0,0.021888000269730885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,8,128,1,float16,float16,0,0.01179733375708262
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,8,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,8,128,1,float16,float16,0,0.11131200194358826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,32,128,1,float16,float16,0,0.012997332960367203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,32,128,1,float16,fp8,0,0.023503998915354412
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,32,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,2,128,1,float16,float16,0,0.011231999844312668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,2,128,1,float16,fp8,0,0.010954666882753372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,4,128,1,float16,float16,0,0.011215999722480774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,2,128,1,float16,float16,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,4,128,1,float16,fp8,0,0.011690666278203329
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,8,128,1,float16,fp8,0,0.011194666226704916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,1,128,1,float16,float16,0,0.011183999478816986
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,32,128,1,float16,float16,0,0.012991999586423239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,1,128,1,float16,fp8,0,0.011125333607196808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,2,128,1,float16,float16,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,4,128,1,float16,float16,0,0.01128000020980835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,1,128,1,float16,fp8,0,0.011168000598748526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,4,128,1,float16,fp8,0,0.011322667201360067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,8,128,1,float16,float16,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,8,128,1,float16,fp8,0,0.011045332998037338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,8,128,1,float16,float16,0,0.01098666712641716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,4,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,1,128,1,float16,float16,0,0.01516266663869222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,2,128,1,float16,fp8,0,0.011120000233252844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,0,0.014959999670584997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,32,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,0,0.01915733392039935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,0,0.017221332838137943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,0,0.02942933390537898
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,0,0.025381334125995636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,1,128,1,float16,float16,0,0.012037333101034164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,32,128,1,float16,fp8,0,0.04402133325735728
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,0,0.013189333180586496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,1,128,1,float16,float16,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,0,0.014783999572197596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,0,0.019359999646743137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,32,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,32,128,1,float16,float16,0,0.029717333614826202
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,1,128,1,float16,fp8,0,0.01128000020980835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,0,0.010666667173306147
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,0,0.010928000013033548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,0,0.01320533330241839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,32,128,1,float16,float16,0,0.058389330903689064
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,1,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,1,128,1,float16,float16,0,0.011237333218256632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,0,0.01716800034046173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,1,128,1,float16,fp8,0,0.011125333607196808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,0,0.010778666784365972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,0,0.010869332899649939
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,0,0.010128000130256018
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,0,0.011120000233252844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,32,128,1,float16,fp8,0,0.01922133316596349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,1,128,1,float16,fp8,0,0.011120000233252844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,0,0.009141333401203156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,32,128,1,float16,fp8,0,0.015168000012636185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,32,128,1,float16,float16,0,0.015210667004187902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,1,128,1,float16,fp8,0,0.010885333021481832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,0,0.008954666554927826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,0,0.009119999905427298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,32,128,1,float16,float16,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,32,128,1,float16,fp8,0,0.0271573339899381
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,1,128,1,float16,float16,0,0.011136000355084738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,1,128,1,float16,fp8,0,0.011130666981140772
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,0,0.008997333546479544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,0,0.009232000137368837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,0,0.009178666397929192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,0,0.009375999992092451
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,0,0.008943999807039896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,32,128,1,float16,float16,0,0.019391999890406925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,32,128,1,float16,fp8,0,0.01101333275437355
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,1,128,1,float16,float16,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,1,128,1,float16,float16,0,0.011055999745925268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,32,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,0,0.009088000282645226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,0,0.009114666531483332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,0,0.009050666665037474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,0,0.010853332777818045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,0,0.009088000282645226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,32,128,1,float16,float16,0,0.009989333028594652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,32,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,1,128,1,float16,fp8,0,0.01089599976936976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,32,128,1,float16,float16,0,0.011007999380429586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,0,0.009248000259200731
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,1,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,0,0.009114666531483332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,0,0.008858666444818178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,0,0.008933333059151968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,32,128,1,float16,float16,0,0.009962666779756546
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,1,128,1,float16,float16,0,0.011125333607196808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,1,128,1,float16,float16,0,0.011039999624093374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,0,0.00922133338948091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,0,0.009173333023985228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,0,0.009839999799927076
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,0,0.009002666920423508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,0,0.009904000287254652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,0,0.009205333267649015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,32,128,1,float16,fp8,0,0.011141333729028702
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,1,128,1,float16,float16,0,0.010960000256697336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,1,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,0,0.009226666763424873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,1,128,1,float16,fp8,0,8.710138956705729
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,1,128,1,float16,float16,0,11.691215515136719
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,2,128,1,float16,fp8,0,8.60098648071289
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,2,128,1,float16,float16,0,11.825482686360678
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,4,128,1,float16,fp8,0,9.261173248291016
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,4,128,1,float16,float16,0,12.342202504475912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,24,128,1,float16,float16,0,6.413120269775391
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,8,128,1,float16,fp8,0,9.15455436706543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,1,128,1,float16,float16,0,5.229493459065755
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,1,128,1,float16,fp8,0,4.615807851155599
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,8,128,1,float16,float16,0,12.260234832763672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,2,128,1,float16,float16,0,5.649893442789714
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,2,128,1,float16,fp8,0,4.445226669311523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,4,128,1,float16,fp8,0,4.551146825154622
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,24,128,1,float16,float16,0,3.172335942586263
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,8,128,1,float16,float16,0,5.789487838745117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,1,128,1,float16,fp8,0,2.182912031809489
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,24,128,1,float16,fp8,0,2.776458740234375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,24,128,1,float16,fp8,0,5.348453521728516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,2,128,1,float16,float16,0,2.770064036051432
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,2,128,1,float16,fp8,0,2.466597398122152
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,4,128,1,float16,fp8,0,2.391413370768229
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,4,128,1,float16,float16,0,2.8485546112060547
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,8,128,1,float16,float16,0,2.8514931996663413
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,24,128,1,float16,float16,0,1.5810880661010742
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,1,128,1,float16,float16,0,1.401312033335368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,24,128,1,float16,fp8,0,1.405029296875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,1,128,1,float16,float16,0,2.680720011393229
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,2,128,1,float16,fp8,0,1.224671999613444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,4,128,1,float16,float16,0,1.497872034708659
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,4,128,1,float16,fp8,0,1.284991979598999
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,8,128,1,float16,float16,0,1.4674240748087566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,8,128,1,float16,fp8,0,1.2587947050730388
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,8,128,1,float16,fp8,0,4.719621340433757
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,1,128,1,float16,fp8,0,1.1887733141581218
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,2,128,1,float16,float16,0,1.4114880561828613
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,1,128,1,float16,float16,0,6.543413162231445
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,2,128,1,float16,float16,0,6.739914576212565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,2,128,1,float16,fp8,0,4.98197873433431
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,4,128,1,float16,float16,0,5.822293599446614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,4,128,1,float16,float16,0,6.956917444864909
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,4,128,1,float16,fp8,0,5.474063873291016
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,1,128,1,float16,fp8,0,4.8601118723551435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,24,128,1,float16,float16,0,3.6913493474324546
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,8,128,1,float16,fp8,0,5.619882583618164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,1,128,1,float16,float16,0,3.0568106969197593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,24,128,1,float16,fp8,0,3.2367092768351235
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,2,128,1,float16,float16,0,3.0713332494099936
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,8,128,1,float16,fp8,0,2.457263946533203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,4,128,1,float16,float16,0,3.342597325642904
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,4,128,1,float16,fp8,0,3.2438348134358725
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,8,128,1,float16,float16,0,3.4437761306762695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,8,128,1,float16,float16,0,6.778005599975586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,24,128,1,float16,float16,0,1.8643412590026855
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,1,128,1,float16,fp8,0,1.4092159271240234
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,1,128,1,float16,fp8,0,2.480997403462728
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,2,128,1,float16,float16,0,1.5912639300028484
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,2,128,1,float16,fp8,0,1.3286879857381184
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,4,128,1,float16,float16,0,1.749392032623291
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,4,128,1,float16,fp8,0,1.4575360616048176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,8,128,1,float16,fp8,0,1.4493707021077473
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,24,128,1,float16,float16,0,0.9916906356811523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,1,128,1,float16,float16,0,0.8327893416086832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,1,128,1,float16,float16,0,1.5637706120808919
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,1,128,1,float16,fp8,0,0.696565310160319
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,2,128,1,float16,float16,0,0.8493333657582601
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,2,128,1,float16,fp8,0,0.7158346970876058
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,4,128,1,float16,float16,0,0.9138879776000977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,4,128,1,float16,fp8,0,0.7899040381113688
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,2,128,1,float16,fp8,0,2.6266132990519204
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,8,128,1,float16,float16,0,0.8941333293914795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,8,128,1,float16,fp8,0,0.7824373245239258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,8,128,1,float16,float16,0,1.7072426478068035
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,24,128,1,float16,fp8,0,0.8852213223775228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,1,128,1,float16,float16,0,4.373658816019694
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,1,128,1,float16,fp8,0,3.4716533025105796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,2,128,1,float16,fp8,0,3.5793066024780273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,2,128,1,float16,float16,0,4.389749209086101
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,4,128,1,float16,fp8,0,3.8934666315714517
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,24,128,1,float16,float16,0,2.72544002532959
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,24,128,1,float16,fp8,0,2.406912008921305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,8,128,1,float16,fp8,0,2.9985920588175454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,1,128,1,float16,float16,0,2.159023920694987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,1,128,1,float16,fp8,0,1.7844479878743489
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,4,128,1,float16,float16,0,4.846351941426595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,2,128,1,float16,fp8,0,1.9545440673828125
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,8,128,1,float16,float16,0,4.860581398010254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,8,128,1,float16,fp8,0,4.006368001302083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,4,128,1,float16,fp8,0,2.020143985748291
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,24,128,1,float16,fp8,0,1.7368480364481609
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,8,128,1,float16,fp8,0,2.059919993082682
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,8,128,1,float16,float16,0,2.416554609934489
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,1,128,1,float16,float16,0,1.1163679758707683
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,24,128,1,float16,fp8,0,1.2481226921081543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,2,128,1,float16,float16,0,1.1434133052825928
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,2,128,1,float16,fp8,0,0.9627306461334229
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,4,128,1,float16,float16,0,1.2330613136291504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,4,128,1,float16,fp8,0,1.0863893032073975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,8,128,1,float16,fp8,0,1.0670719941457112
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,24,128,1,float16,float16,0,0.7220160166422526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,24,128,1,float16,fp8,0,0.6694773038228353
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,1,128,1,float16,fp8,0,0.512880007425944
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,2,128,1,float16,fp8,0,0.5247253179550171
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,24,128,1,float16,float16,0,1.3690826098124187
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,1,128,1,float16,fp8,0,0.93559463818868
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,2,128,1,float16,float16,0,2.1893439292907715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,4,128,1,float16,float16,0,0.6534560124079386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,8,128,1,float16,float16,0,0.65502401192983
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,8,128,1,float16,fp8,0,0.5858560005823771
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,8,128,1,float16,float16,0,1.2207787036895752
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,4,128,1,float16,float16,0,2.3873866399129233
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,2,128,1,float16,float16,0,0.6187839905420939
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,1,128,1,float16,float16,0,5.838682810465495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,1,128,1,float16,fp8,0,4.554746627807617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,2,128,1,float16,fp8,0,4.710581461588542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,2,128,1,float16,float16,0,5.682687759399414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,4,128,1,float16,fp8,0,0.5782773494720459
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,4,128,1,float16,float16,0,6.543359756469727
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,1,128,1,float16,float16,0,0.6027359962463379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,1,128,1,float16,float16,0,2.858170509338379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,24,128,1,float16,float16,0,3.5560693740844727
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,8,128,1,float16,fp8,0,5.322160085042317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,1,128,1,float16,fp8,0,2.4865493774414062
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,8,128,1,float16,float16,0,6.444976170857747
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,2,128,1,float16,fp8,0,2.605658690134684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,4,128,1,float16,fp8,0,2.688666661580404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,4,128,1,float16,fp8,0,5.497978846232097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,8,128,1,float16,fp8,0,2.891178766886393
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,24,128,1,float16,fp8,0,3.460975964864095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,24,128,1,float16,fp8,0,1.6928586959838867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,1,128,1,float16,fp8,0,1.2087999979654949
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,2,128,1,float16,float16,0,2.9332958857218423
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,2,128,1,float16,fp8,0,1.2407786846160889
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,4,128,1,float16,fp8,0,1.4156053860982258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,4,128,1,float16,float16,0,3.207296053568522
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,24,128,1,float16,float16,0,1.8331146240234375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,8,128,1,float16,fp8,0,1.3900426228841145
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,24,128,1,float16,float16,0,0.9328906536102295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,1,128,1,float16,float16,0,0.7590186595916748
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,1,128,1,float16,float16,0,1.476960023244222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,24,128,1,float16,fp8,0,0.8833813667297363
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,1,128,1,float16,fp8,0,0.6333280007044474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,2,128,1,float16,fp8,0,0.6579840183258057
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,2,128,1,float16,float16,0,1.462997277577718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,8,128,1,float16,float16,0,0.8529333273569742
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,8,128,1,float16,fp8,0,0.7380053202311198
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,8,128,1,float16,float16,0,3.16105588277181
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,24,128,1,float16,float16,0,0.5067306756973267
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,24,128,1,float16,fp8,0,0.4795680046081543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,1,128,1,float16,float16,0,0.4031306505203247
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,1,128,1,float16,fp8,0,0.3552693525950114
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,2,128,1,float16,float16,0,0.41462401549021405
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,2,128,1,float16,float16,0,0.7736533482869467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,4,128,1,float16,fp8,0,0.3981706698735555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,4,128,1,float16,float16,0,0.8523413340250651
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,8,128,1,float16,float16,0,0.453328013420105
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,8,128,1,float16,fp8,0,0.41650664806365967
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,8,128,1,float16,float16,0,1.619658629099528
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,1,128,1,float16,float16,0,3.3204641342163086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,1,128,1,float16,fp8,0,2.709514617919922
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,2,128,1,float16,fp8,0,0.3624746799468994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,2,128,1,float16,fp8,0,2.8437493642171225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,4,128,1,float16,float16,0,1.6179787317911785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,4,128,1,float16,fp8,0,3.2000853220621743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,4,128,1,float16,float16,0,3.944746653238932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,8,128,1,float16,float16,0,3.7578614552815757
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,8,128,1,float16,fp8,0,3.293183962504069
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,1,128,1,float16,float16,0,1.6780319213867188
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,2,128,1,float16,float16,0,3.3356479008992515
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,24,128,1,float16,fp8,0,2.1309653917948403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,2,128,1,float16,float16,0,1.7079839706420898
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,2,128,1,float16,fp8,0,1.4759573936462402
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,4,128,1,float16,fp8,0,0.7602453231811523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,4,128,1,float16,float16,0,1.914778709411621
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,8,128,1,float16,fp8,0,1.6859679222106934
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,8,128,1,float16,float16,0,1.9248587290445964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,24,128,1,float16,float16,0,1.127616008122762
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,24,128,1,float16,fp8,0,1.1110719839731853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,1,128,1,float16,fp8,0,0.7250026861826578
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,1,128,1,float16,fp8,0,1.498480002085368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,2,128,1,float16,fp8,0,0.7571146488189697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,4,128,1,float16,float16,0,0.9909866650899252
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,4,128,1,float16,fp8,0,1.7373919486999512
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,4,128,1,float16,fp8,0,0.8919093608856201
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,8,128,1,float16,float16,0,0.9876320362091064
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,8,128,1,float16,fp8,0,0.912938674290975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,24,128,1,float16,float16,0,0.5886933406194051
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,1,128,1,float16,float16,0,0.8601760069529215
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,1,128,1,float16,float16,0,0.454586664835612
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,24,128,1,float16,fp8,0,0.5787680149078369
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,2,128,1,float16,float16,0,0.46648534138997394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,2,128,1,float16,fp8,0,0.4145013491312663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,4,128,1,float16,float16,0,0.5167306661605835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,4,128,1,float16,float16,0,0.4585333267847697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,8,128,1,float16,float16,0,0.5172053178151449
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,8,128,1,float16,fp8,0,0.4777760108311971
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,24,128,1,float16,float16,0,0.32525332768758136
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,24,128,1,float16,fp8,0,0.32222400108973187
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,1,128,1,float16,fp8,0,0.22722133000691733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,2,128,1,float16,float16,0,0.2557813326517741
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,2,128,1,float16,fp8,0,0.23522667090098062
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,24,128,1,float16,float16,0,2.2180372873942056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,8,128,1,float16,float16,0,0.29180800914764404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,8,128,1,float16,fp8,0,0.26173333326975506
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,4,128,1,float16,fp8,0,0.4839093287785848
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,1,128,1,float16,float16,0,0.25140267610549927
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,1,128,1,float16,float16,0,3.1947946548461914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,4,128,1,float16,float16,0,0.2802079916000366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,2,128,1,float16,float16,0,3.295893351236979
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,4,128,1,float16,fp8,0,0.2571679949760437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,2,128,1,float16,fp8,0,2.7961867650349936
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,2,128,1,float16,float16,0,0.8676586945851644
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,4,128,1,float16,float16,0,4.030234654744466
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,8,128,1,float16,float16,0,3.8257226943969727
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,24,128,1,float16,float16,0,2.3512694040934243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,24,128,1,float16,fp8,0,2.325226624806722
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,1,128,1,float16,float16,0,1.6134079297383626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,2,128,1,float16,fp8,0,1.4169440269470215
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,1,128,1,float16,fp8,0,0.392410675684611
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,4,128,1,float16,float16,0,2.00382407506307
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,4,128,1,float16,fp8,0,1.665162722269694
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,8,128,1,float16,fp8,0,3.4771200815836587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,8,128,1,float16,float16,0,1.9683200518290203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,8,128,1,float16,fp8,0,1.7679519653320312
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,1,128,1,float16,float16,0,0.7940266927083334
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,24,128,1,float16,float16,0,1.1699466705322266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,24,128,1,float16,fp8,0,1.1871253649393718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,2,128,1,float16,float16,0,0.8339626789093018
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,2,128,1,float16,fp8,0,0.7407039801279703
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,4,128,1,float16,float16,0,0.9701759815216064
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,4,128,1,float16,fp8,0,0.9016106923421224
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,8,128,1,float16,float16,0,0.9580000241597494
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,4,128,1,float16,fp8,0,3.5870132446289062
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,8,128,1,float16,fp8,0,0.9434346357981364
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,24,128,1,float16,float16,0,0.6060320138931274
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,1,128,1,float16,fp8,0,0.3733493487040202
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,24,128,1,float16,fp8,0,0.6159840027491251
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,2,128,1,float16,float16,0,0.43354666233062744
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,2,128,1,float16,fp8,0,0.39212799072265625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,2,128,1,float16,float16,0,1.6637600262959797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,4,128,1,float16,fp8,0,0.4941920042037964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,8,128,1,float16,float16,0,0.5019306739171346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,8,128,1,float16,fp8,0,0.4696640173594157
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,24,128,1,float16,float16,0,0.3235359986623128
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,1,128,1,float16,fp8,0,0.2071946660677592
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,2,128,1,float16,float16,0,0.23968533674875894
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,1,128,1,float16,fp8,0,2.631061395009359
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,2,128,1,float16,fp8,0,0.21633599201838175
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,4,128,1,float16,fp8,0,0.2456266681353251
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,8,128,1,float16,float16,0,0.2731253306070964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,24,128,1,float16,fp8,0,0.33399999141693115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,8,128,1,float16,fp8,0,0.26047466198603314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,1,128,1,float16,float16,0,0.4154293139775594
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,24,128,1,float16,float16,0,0.18639467159907022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,1,128,1,float16,fp8,0,1.4773759841918945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,1,128,1,float16,fp8,0,0.12594133615493774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,1,128,1,float16,fp8,0,0.6906879742940267
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,2,128,1,float16,float16,0,0.1399893363316854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,2,128,1,float16,fp8,0,0.13170133034388223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,4,128,1,float16,float16,0,0.2768746614456177
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,4,128,1,float16,fp8,0,0.14516799648602804
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,8,128,1,float16,float16,0,0.15267200271288553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,1,128,1,float16,float16,0,0.22812267144521078
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,1,128,1,float16,fp8,0,1.6197120348612468
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,24,128,1,float16,fp8,0,0.18610666195551553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,2,128,1,float16,float16,0,2.0112160046895347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,4,128,1,float16,float16,0,0.5041120052337646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,4,128,1,float16,float16,0,0.15160000324249268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,2,128,1,float16,fp8,0,1.7388319969177246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,8,128,1,float16,fp8,0,0.14692800243695578
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,4,128,1,float16,fp8,0,2.2836853663126626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,8,128,1,float16,float16,0,2.3547892570495605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,1,128,1,float16,float16,0,0.9630133310953776
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,8,128,1,float16,fp8,0,2.209418614705404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,1,128,1,float16,float16,0,1.8918612798055012
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,1,128,1,float16,fp8,0,0.8325653076171875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,2,128,1,float16,fp8,0,0.8939519723256429
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,4,128,1,float16,float16,0,1.2414613564809163
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,4,128,1,float16,fp8,0,1.083621342976888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,8,128,1,float16,float16,0,1.231386661529541
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,8,128,1,float16,fp8,0,1.1890559991200764
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,24,128,1,float16,float16,0,1.5124640464782715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,24,128,1,float16,float16,0,0.7765653133392334
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,1,128,1,float16,float16,0,0.135535995165507
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,1,128,1,float16,float16,0,0.4841759999593099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,2,128,1,float16,float16,0,1.009552001953125
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,1,128,1,float16,fp8,0,0.43623467286427814
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,2,128,1,float16,fp8,0,0.46618131796518963
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,4,128,1,float16,float16,0,0.625770648320516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,4,128,1,float16,fp8,0,0.5860693454742432
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,8,128,1,float16,float16,0,0.6370773315429688
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,8,128,1,float16,fp8,0,0.585045337677002
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,24,128,1,float16,float16,0,0.401034673055013
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,24,128,1,float16,fp8,0,0.42072534561157227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,24,128,1,float16,fp8,0,1.566912015279134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,2,128,1,float16,float16,0,0.27643734216690063
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,2,128,1,float16,float16,0,0.5118666489919027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,8,128,1,float16,float16,0,0.33321599165598553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,4,128,1,float16,float16,0,2.4371840159098306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,8,128,1,float16,fp8,0,0.32366933425267536
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,24,128,1,float16,float16,0,0.2198293407758077
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,1,128,1,float16,float16,0,0.26236265897750854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,1,128,1,float16,float16,0,0.14920533696810404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,1,128,1,float16,fp8,0,0.2392586668332418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,24,128,1,float16,fp8,0,0.803178628285726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,4,128,1,float16,float16,0,0.17570134003957114
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,4,128,1,float16,fp8,0,0.16633599996566772
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,8,128,1,float16,float16,0,0.18802666664123535
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,8,128,1,float16,fp8,0,0.17611199617385864
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,1,128,1,float16,fp8,0,0.13501333196957907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,2,128,1,float16,fp8,0,0.2587573329607646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,24,128,1,float16,float16,0,0.12868799765904745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,4,128,1,float16,float16,0,0.3303999900817871
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,24,128,1,float16,fp8,0,0.12285332878430684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,1,128,1,float16,fp8,0,0.08739200234413147
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,2,128,1,float16,fp8,0,0.08931199709574382
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,4,128,1,float16,fp8,0,0.10099200407663982
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,8,128,1,float16,float16,0,0.10500267148017883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,24,128,1,float16,fp8,0,0.23007466395696005
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,8,128,1,float16,fp8,0,0.10265066226323445
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,4,128,1,float16,fp8,0,0.32016533613204956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,1,128,1,float16,float16,0,1.858415921529134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,1,128,1,float16,fp8,0,1.648421287536621
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,4,128,1,float16,float16,0,0.10499733686447144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,2,128,1,float16,float16,0,1.985354741414388
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,2,128,1,float16,fp8,0,1.859338601430257
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,4,128,1,float16,float16,0,2.7031307220458984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,4,128,1,float16,fp8,0,2.6047040621439614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,2,128,1,float16,float16,0,0.15362667044003805
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,2,128,1,float16,float16,0,0.09687466422716777
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,8,128,1,float16,fp8,0,2.4206825892130532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,1,128,1,float16,float16,0,0.9312853018442789
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,1,128,1,float16,fp8,0,0.8399733702341715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,24,128,1,float16,fp8,0,1.8238827387491863
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,2,128,1,float16,float16,0,1.0034026304880779
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,4,128,1,float16,float16,0,1.326527992884318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,4,128,1,float16,fp8,0,1.2201279799143474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,2,128,1,float16,fp8,0,0.1441920002301534
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,8,128,1,float16,float16,0,1.3551252683003743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,8,128,1,float16,float16,0,2.5169173876444497
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,24,128,1,float16,float16,0,0.8671306769053141
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,1,128,1,float16,float16,0,0.4767306645711263
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,24,128,1,float16,fp8,0,0.9245279630025228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,1,128,1,float16,fp8,0,0.4442773262659709
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,2,128,1,float16,float16,0,0.5275786717732748
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,2,128,1,float16,fp8,0,0.4860373338063558
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,4,128,1,float16,fp8,0,0.6777973175048828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,8,128,1,float16,fp8,0,0.6409173409144083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,24,128,1,float16,float16,0,0.4413119951883952
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,8,128,1,float16,fp8,0,1.2380159695943196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,24,128,1,float16,float16,0,1.7055145899454753
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,1,128,1,float16,fp8,0,0.23628799120585123
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,2,128,1,float16,fp8,0,0.2541866699854533
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,4,128,1,float16,float16,0,0.6689279874165853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,4,128,1,float16,float16,0,0.3557973305384318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,8,128,1,float16,float16,0,0.6730453173319498
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,1,128,1,float16,float16,0,0.09455999732017517
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,8,128,1,float16,fp8,0,0.3372053305308024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,1,128,1,float16,float16,0,0.2571093241373698
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,24,128,1,float16,float16,0,0.23653332392374674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,1,128,1,float16,float16,0,0.14406399925549826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,24,128,1,float16,fp8,0,0.25667200485865277
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,1,128,1,float16,fp8,0,0.1320373316605886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,2,128,1,float16,float16,0,0.1529759963353475
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,2,128,1,float16,fp8,0,0.924015998840332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,2,128,1,float16,fp8,0,0.13923733433087668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,4,128,1,float16,float16,0,0.18933866421381632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,4,128,1,float16,fp8,0,0.3595786492029826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,8,128,1,float16,fp8,0,0.18375466267267862
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,24,128,1,float16,float16,0,0.13578133781750998
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,8,128,1,float16,float16,0,0.3386880159378052
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,1,128,1,float16,fp8,0,0.07926933467388153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,2,128,1,float16,float16,0,0.08891200025876363
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,2,128,1,float16,fp8,0,0.08401067058245341
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,4,128,1,float16,fp8,0,0.09988799691200256
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,2,128,1,float16,float16,0,0.27349332968393963
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,8,128,1,float16,float16,0,0.10008533795674641
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,4,128,1,float16,fp8,0,0.17674134174982706
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,8,128,1,float16,fp8,0,0.10076799988746643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,8,128,1,float16,float16,0,0.18835200866063437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,24,128,1,float16,fp8,0,0.07580799857775371
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,1,128,1,float16,fp8,0,0.05301333467165629
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,24,128,1,float16,fp8,0,0.48048532009124756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,2,128,1,float16,float16,0,0.058335999647776283
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,4,128,1,float16,float16,0,0.09939199686050415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,4,128,1,float16,float16,0,0.06423999865849812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,8,128,1,float16,float16,0,0.06436799963315327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,8,128,1,float16,fp8,0,0.06390933195749919
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,24,128,1,float16,float16,0,0.07367466886838277
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,1,128,1,float16,float16,0,1.1470666726430256
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,2,128,1,float16,fp8,0,0.055957332253456116
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,1,128,1,float16,fp8,0,1.0648053487141926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,4,128,1,float16,fp8,0,0.06412266691525777
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,2,128,1,float16,float16,0,1.2535306612650554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,2,128,1,float16,fp8,0,1.1825013160705566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,4,128,1,float16,float16,0,1.760767936706543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,4,128,1,float16,fp8,0,1.770309289296468
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,8,128,1,float16,float16,0,1.664746602376302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,1,128,1,float16,float16,0,0.05804799993832906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,1,128,1,float16,float16,0,0.5842026472091675
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,24,128,1,float16,fp8,0,1.2776959737141926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,1,128,1,float16,fp8,0,0.5432159900665283
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,2,128,1,float16,fp8,0,0.6071306864420573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,4,128,1,float16,float16,0,0.8527359962463379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,4,128,1,float16,fp8,0,0.7935840288798014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,8,128,1,float16,float16,0,0.8482933044433594
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,1,128,1,float16,float16,0,0.08502933382987976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,8,128,1,float16,fp8,0,1.6305440266927083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,8,128,1,float16,fp8,0,0.8990026315053304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,24,128,1,float16,float16,0,0.5849546591440836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,1,128,1,float16,float16,0,0.3110346595446269
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,24,128,1,float16,fp8,0,0.6508693297704061
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,24,128,1,float16,float16,0,1.162922700246175
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,4,128,1,float16,float16,0,0.45070401827494305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,8,128,1,float16,float16,0,0.43622398376464844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,8,128,1,float16,fp8,0,0.4607306718826294
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,24,128,1,float16,fp8,0,0.34038400650024414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,24,128,1,float16,fp8,0,0.138672004143397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,1,128,1,float16,fp8,0,0.15780799587567648
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,1,128,1,float16,fp8,0,0.2888053258260091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,2,128,1,float16,float16,0,0.3363200028737386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,2,128,1,float16,float16,0,0.17921600739161173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,2,128,1,float16,fp8,0,0.32286399602890015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,4,128,1,float16,fp8,0,0.44593600432078045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,4,128,1,float16,fp8,0,0.23072532812754312
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,8,128,1,float16,float16,0,0.24231467644373575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,24,128,1,float16,float16,0,0.16900799671808878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,24,128,1,float16,fp8,0,0.1828320026397705
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,24,128,1,float16,float16,0,0.3062559962272644
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,1,128,1,float16,float16,0,0.09508267045021057
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,1,128,1,float16,fp8,0,0.09074133634567261
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,2,128,1,float16,float16,0,0.10039466619491577
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,2,128,1,float16,fp8,0,0.0969493289788564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,4,128,1,float16,float16,0,0.12349866827329
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,4,128,1,float16,fp8,0,0.12035733461380005
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,2,128,1,float16,fp8,0,0.17339734236399332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,8,128,1,float16,fp8,0,0.12412266929944356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,4,128,1,float16,float16,0,0.24064532915751138
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,24,128,1,float16,fp8,0,0.09382399916648865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,1,128,1,float16,fp8,0,0.056885331869125366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,2,128,1,float16,float16,0,0.06251733501752217
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,4,128,1,float16,float16,0,0.070783997575442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,4,128,1,float16,fp8,0,0.07069333394368489
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,1,128,1,float16,float16,0,0.16936000188191733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,8,128,1,float16,fp8,0,0.07274666428565979
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,8,128,1,float16,float16,0,0.13108799854914346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,2,128,1,float16,float16,0,0.6331679821014404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,24,128,1,float16,fp8,0,0.05392000079154968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,24,128,1,float16,float16,0,0.09914666414260864
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,1,128,1,float16,float16,0,0.037615999579429626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,2,128,1,float16,fp8,0,0.060266668597857155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,4,128,1,float16,float16,0,0.04178133110205332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,4,128,1,float16,fp8,0,0.04181333382924398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,8,128,1,float16,float16,0,0.07036266724268596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,8,128,1,float16,float16,0,0.04233600199222565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,8,128,1,float16,fp8,0,0.04375466704368591
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,1,128,1,float16,float16,0,0.0602400004863739
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,8,128,1,float16,fp8,0,0.23462400833765665
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,2,128,1,float16,float16,0,0.03769599894682566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,1,128,1,float16,fp8,0,1.15666667620341
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,2,128,1,float16,float16,0,1.3552266756693523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,2,128,1,float16,fp8,0,1.3229866822560628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,4,128,1,float16,fp8,0,2.0635573069254556
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,24,128,1,float16,float16,0,0.04747200012207031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,1,128,1,float16,float16,0,1.2256053288777669
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,1,128,1,float16,fp8,0,0.03572800010442734
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,8,128,1,float16,fp8,0,1.9370452562967937
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,1,128,1,float16,float16,0,0.6214133501052856
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,1,128,1,float16,fp8,0,0.5897546609242758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,24,128,1,float16,fp8,0,1.5644319852193196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,2,128,1,float16,float16,0,0.6901813348134359
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,2,128,1,float16,fp8,0,0.6736319859822592
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,4,128,1,float16,float16,0,0.9924906889597574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,4,128,1,float16,fp8,0,0.9642666975657145
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,8,128,1,float16,float16,0,0.9627947012583414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,8,128,1,float16,fp8,0,1.0633386770884197
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,2,128,1,float16,fp8,0,0.03655466685692469
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,8,128,1,float16,float16,0,1.8864372571309407
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,24,128,1,float16,float16,0,0.6966666380564371
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,1,128,1,float16,fp8,0,0.30661332607269287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,24,128,1,float16,fp8,0,0.7928319772084554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,2,128,1,float16,float16,0,0.3637919823328654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,2,128,1,float16,fp8,0,0.3500373363494873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,4,128,1,float16,float16,0,0.5101120074590048
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,4,128,1,float16,fp8,0,0.48545066515604657
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,8,128,1,float16,float16,0,0.5029439926147461
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,8,128,1,float16,fp8,0,0.5170613527297974
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,1,128,1,float16,float16,0,0.17541333039601645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,24,128,1,float16,float16,0,0.36135466893513996
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,2,128,1,float16,float16,0,0.19237866004308066
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,2,128,1,float16,fp8,0,0.18844799200693765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,4,128,1,float16,float16,0,0.267247994740804
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,24,128,1,float16,float16,0,1.3718506495157878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,8,128,1,float16,float16,0,0.25892800092697144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,8,128,1,float16,fp8,0,0.2898133397102356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,1,128,1,float16,float16,0,0.09904533624649048
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,24,128,1,float16,float16,0,0.19097065925598145
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,24,128,1,float16,fp8,0,0.4098666508992513
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,2,128,1,float16,float16,0,0.11053333679835002
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,2,128,1,float16,fp8,0,0.10030399759610494
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,4,128,1,float16,float16,0,0.14205867052078247
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,4,128,1,float16,fp8,0,0.13009599844614664
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,8,128,1,float16,fp8,0,0.14461867014567056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,4,128,1,float16,float16,0,2.0688799222310386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,24,128,1,float16,float16,0,0.1113813320795695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,1,128,1,float16,fp8,0,0.16590399543444315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,1,128,1,float16,fp8,0,0.09148800373077393
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,24,128,1,float16,fp8,0,0.11718400319417317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,1,128,1,float16,fp8,0,0.056133334835370384
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,2,128,1,float16,fp8,0,0.061887999375661217
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,4,128,1,float16,fp8,0,0.26046933730443317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,8,128,1,float16,float16,0,0.1423413356145223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,8,128,1,float16,float16,0,0.07579199969768524
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,8,128,1,float16,fp8,0,0.07755200068155925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,24,128,1,float16,float16,0,0.05671999851862589
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,24,128,1,float16,fp8,0,0.21635200579961142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,1,128,1,float16,float16,0,0.04131733377774557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,1,128,1,float16,fp8,0,0.03909866760174433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,1,128,1,float16,float16,0,0.32629867394765216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,2,128,1,float16,fp8,0,0.041573333243529
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,4,128,1,float16,float16,0,0.04785599807898203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,4,128,1,float16,fp8,0,0.0763679991165797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,4,128,1,float16,fp8,0,0.04826133449872335
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,8,128,1,float16,float16,0,0.048021331429481506
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,24,128,1,float16,float16,0,0.03565866748491923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,24,128,1,float16,fp8,0,0.039919999738534294
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,1,128,1,float16,float16,0,0.027621333797772724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,24,128,1,float16,fp8,0,0.061941335598627724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,2,128,1,float16,float16,0,0.029311999678611755
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,2,128,1,float16,float16,0,0.062261333068211876
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,4,128,1,float16,float16,0,0.07256533205509186
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,8,128,1,float16,float16,0,0.03153600047032038
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,8,128,1,float16,fp8,0,0.050016000866889954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,1,128,1,float16,fp8,0,0.02741866558790207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,1,128,1,float16,float16,0,0.058464000622431435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,2,128,1,float16,fp8,0,0.02769600103298823
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,1,128,1,float16,float16,0,0.9164693355560303
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,2,128,1,float16,float16,0,0.04167999823888143
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,4,128,1,float16,float16,0,0.031583999594052635
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,8,128,1,float16,fp8,0,0.032416000962257385
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,1,128,1,float16,fp8,0,0.9086026350657145
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,2,128,1,float16,fp8,0,1.072330633799235
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,4,128,1,float16,float16,0,1.754410743713379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,4,128,1,float16,fp8,0,1.5094559987386067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,24,128,1,float16,float16,0,1.2133599917093914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,4,128,1,float16,fp8,0,0.03140799949566523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,8,128,1,float16,float16,0,1.6777866681416829
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,2,128,1,float16,float16,0,1.066975990931193
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,24,128,1,float16,fp8,0,1.4282560348510742
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,1,128,1,float16,fp8,0,0.4633653163909912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,4,128,1,float16,fp8,0,0.8416746457417806
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,4,128,1,float16,float16,0,0.8719680309295654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,8,128,1,float16,fp8,0,0.8518773714701334
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,24,128,1,float16,float16,0,0.6181439956029257
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,8,128,1,float16,fp8,0,1.6579519907633464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,2,128,1,float16,float16,0,0.5476160049438477
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,24,128,1,float16,fp8,0,0.7245919704437256
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,1,128,1,float16,float16,0,0.2473599910736084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,2,128,1,float16,fp8,0,0.28996266921361286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,4,128,1,float16,float16,0,0.4269599914550781
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,2,128,1,float16,float16,0,0.2802720069885254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,8,128,1,float16,float16,0,0.8941386540730795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,8,128,1,float16,fp8,0,0.44513599077860516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,24,128,1,float16,float16,0,0.32046933968861896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,1,128,1,float16,float16,0,0.470576008160909
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,2,128,1,float16,fp8,0,0.5455893278121948
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,1,128,1,float16,float16,0,0.13538133104642233
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,1,128,1,float16,fp8,0,0.13458133737246195
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,2,128,1,float16,float16,0,0.15454933047294617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,2,128,1,float16,fp8,0,0.1569706698258718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,4,128,1,float16,float16,0,0.22733867168426514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,4,128,1,float16,fp8,0,0.24419732888539633
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,8,128,1,float16,float16,0,0.22101867198944092
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,1,128,1,float16,float16,0,0.07534400125344594
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,1,128,1,float16,fp8,0,0.07293866574764252
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,2,128,1,float16,float16,0,0.08641599615414937
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,1,128,1,float16,fp8,0,0.2440053423245748
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,24,128,1,float16,fp8,0,0.3726026614507039
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,2,128,1,float16,fp8,0,0.0825386643409729
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,4,128,1,float16,float16,0,0.12422933181126912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,4,128,1,float16,fp8,0,0.44944532712300617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,8,128,1,float16,float16,0,0.12807466586430868
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,8,128,1,float16,fp8,0,0.13480533162752786
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,24,128,1,float16,float16,0,0.09293333689371745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,1,128,1,float16,float16,0,0.045791998505592346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,1,128,1,float16,fp8,0,0.043706665436426796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,2,128,1,float16,float16,0,0.04819199939568838
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,4,128,1,float16,fp8,0,0.11441066861152649
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,2,128,1,float16,fp8,0,0.04958933095137278
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,4,128,1,float16,float16,0,0.060080001751581825
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,4,128,1,float16,fp8,0,0.06438399851322174
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,8,128,1,float16,fp8,0,0.06622399886449178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,8,128,1,float16,float16,0,0.06422933439413707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,24,128,1,float16,float16,0,0.04535466432571411
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,1,128,1,float16,float16,0,0.03148799886306127
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,24,128,1,float16,fp8,0,0.0536106675863266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,2,128,1,float16,float16,0,0.032074667513370514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,2,128,1,float16,fp8,0,0.03352533280849457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,4,128,1,float16,float16,0,0.037605332831541695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,8,128,1,float16,fp8,0,0.2296853264172872
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,8,128,1,float16,float16,0,0.03786666691303253
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,24,128,1,float16,float16,0,0.17082667350769043
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,8,128,1,float16,fp8,0,0.04170133173465729
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,24,128,1,float16,fp8,0,0.03375466664632162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,24,128,1,float16,float16,0,0.028410665690898895
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,24,128,1,float16,fp8,0,0.10513599713643391
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,1,128,1,float16,fp8,0,0.02117866774400075
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,2,128,1,float16,float16,0,0.022661333282788593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,1,128,1,float16,fp8,0,0.03142933299144109
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,4,128,1,float16,fp8,0,0.025392000873883564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,4,128,1,float16,fp8,0,0.039701332648595176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,8,128,1,float16,float16,0,0.025487999121348064
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,8,128,1,float16,fp8,0,0.027424000203609467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,1,128,1,float16,float16,0,0.02048533285657565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,24,128,1,float16,fp8,0,0.02554133286078771
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,1,128,1,float16,fp8,0,0.01964266722400983
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,2,128,1,float16,float16,0,0.02139200021823247
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,2,128,1,float16,fp8,0,0.020288000504175823
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,1,128,1,float16,float16,0,0.02128000060717265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,4,128,1,float16,fp8,0,0.02120000123977661
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,8,128,1,float16,float16,0,0.02146666745344798
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,8,128,1,float16,fp8,0,0.021375998854637146
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,4,128,1,float16,float16,0,0.025199999411900837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,1,128,1,float16,float16,0,0.3983253240585327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,2,128,1,float16,float16,0,0.4591199954350789
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,2,128,1,float16,fp8,0,0.023242667317390442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,4,128,1,float16,float16,0,0.7961493333180746
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,1,128,1,float16,fp8,0,0.39893333117167157
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,4,128,1,float16,fp8,0,0.7179360389709473
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,4,128,1,float16,float16,0,0.02128533273935318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,2,128,1,float16,fp8,0,0.47696534792582196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,8,128,1,float16,float16,0,0.7773013114929199
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,8,128,1,float16,fp8,0,0.8567519982655843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,1,128,1,float16,float16,0,0.2119040091832479
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,24,128,1,float16,float16,0,0.023370665808518726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,2,128,1,float16,fp8,0,0.25226134061813354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,2,128,1,float16,float16,0,0.23971732457478842
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,4,128,1,float16,float16,0,0.3836373488108317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,8,128,1,float16,float16,0,0.4158133268356323
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,24,128,1,float16,fp8,0,0.6865493456522623
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,8,128,1,float16,fp8,0,0.40799999237060547
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,24,128,1,float16,fp8,0,0.3540586630503337
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,24,128,1,float16,float16,0,0.298309326171875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,1,128,1,float16,fp8,0,0.20662933588027954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,4,128,1,float16,fp8,0,0.3845226764678955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,2,128,1,float16,fp8,0,0.13402666648228964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,4,128,1,float16,float16,0,0.2160586714744568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,4,128,1,float16,fp8,0,0.21145067612330118
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,24,128,1,float16,fp8,0,0.19767467180887857
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,8,128,1,float16,float16,0,0.19657599925994873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,24,128,1,float16,fp8,0,0.18741333484649658
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,24,128,1,float16,float16,0,0.1585813363393148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,1,128,1,float16,fp8,0,0.06379733482996623
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,2,128,1,float16,float16,0,0.0757066657145818
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,24,128,1,float16,float16,0,0.5724159876505533
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,4,128,1,float16,float16,0,0.11156266927719116
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,4,128,1,float16,fp8,0,0.11275200049082439
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,8,128,1,float16,float16,0,0.11609066526095073
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,8,128,1,float16,fp8,0,0.12526399890581766
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,8,128,1,float16,fp8,0,0.22287466128667197
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,24,128,1,float16,float16,0,0.08669333656628926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,1,128,1,float16,float16,0,0.06620266536871593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,24,128,1,float16,fp8,0,0.09795199831326802
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,2,128,1,float16,float16,0,0.04043733328580856
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,2,128,1,float16,fp8,0,0.04200000067551931
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,4,128,1,float16,float16,0,0.05183466772238413
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,4,128,1,float16,fp8,0,0.05816000203291575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,8,128,1,float16,float16,0,0.052330667773882546
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,8,128,1,float16,fp8,0,0.05966933568318685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,24,128,1,float16,float16,0,0.040207999447981514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,1,128,1,float16,float16,0,0.02736533433198929
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,1,128,1,float16,float16,0,0.11467732985814412
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,1,128,1,float16,fp8,0,0.0272533322374026
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,1,128,1,float16,fp8,0,0.03756266583998998
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,2,128,1,float16,float16,0,0.02735999971628189
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,2,128,1,float16,fp8,0,0.029445332785447437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,2,128,1,float16,fp8,0,0.07041066884994507
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,4,128,1,float16,fp8,0,0.03619733452796936
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,8,128,1,float16,float16,0,0.033546666304270424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,8,128,1,float16,fp8,0,0.037615999579429626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,24,128,1,float16,float16,0,0.025194667279720306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,1,128,1,float16,float16,0,0.01720000058412552
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,24,128,1,float16,fp8,0,0.04993600149949392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,1,128,1,float16,fp8,0,0.11452266573905945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,2,128,1,float16,float16,0,0.019167999426523846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,1,128,1,float16,float16,0,0.038218667109807335
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,2,128,1,float16,fp8,0,0.019274666905403137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,4,128,1,float16,float16,0,0.021562665700912476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,4,128,1,float16,fp8,0,0.023525332411130268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,8,128,1,float16,float16,0,0.02162666618824005
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,24,128,1,float16,float16,0,0.020074666788180668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,24,128,1,float16,fp8,0,0.023445333043734234
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,1,128,1,float16,float16,0,0.017301333447297413
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,1,128,1,float16,fp8,0,0.01728533332546552
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,2,128,1,float16,float16,0,0.01770666614174843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,2,128,1,float16,fp8,0,0.01714133347074191
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,2,128,1,float16,float16,0,0.13269866506258646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,4,128,1,float16,fp8,0,0.019296000401178997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,8,128,1,float16,float16,0,0.019002666076024372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,4,128,1,float16,float16,0,0.03355200091997782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,24,128,1,float16,float16,0,0.017301333447297413
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,1,128,1,float16,float16,0,0.01700266698996226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,1,128,1,float16,fp8,0,0.01718933383623759
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,2,128,1,float16,float16,0,0.017136000096797943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,2,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,24,128,1,float16,fp8,0,0.03146666785081228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,4,128,1,float16,float16,0,0.01718933383623759
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,8,128,1,float16,float16,0,0.017077332983414333
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,1,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,8,128,1,float16,fp8,0,0.01913600042462349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,8,128,1,float16,fp8,0,0.02367466688156128
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,24,128,1,float16,fp8,0,0.019173332800467808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,1,128,1,float16,float16,0,0.24051199356714884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,1,128,1,float16,fp8,0,0.23924267292022705
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,2,128,1,float16,float16,0,0.27139200766881305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,8,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,4,128,1,float16,fp8,0,0.48582398891448975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,4,128,1,float16,float16,0,0.019066666563351948
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,8,128,1,float16,float16,0,0.41014401117960614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,8,128,1,float16,fp8,0,0.4415733416875203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,1,128,1,float16,float16,0,0.1277653376261393
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,1,128,1,float16,fp8,0,0.1288479963938395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,24,128,1,float16,float16,0,0.31160000960032147
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,2,128,1,float16,float16,0,0.1450506647427877
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,4,128,1,float16,fp8,0,0.01718933383623759
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,2,128,1,float16,fp8,0,0.14749866724014282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,2,128,1,float16,fp8,0,0.27985066175460815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,4,128,1,float16,float16,0,0.41756268342336017
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,8,128,1,float16,float16,0,0.2123946746190389
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,8,128,1,float16,fp8,0,0.2495466669400533
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,1,128,1,float16,float16,0,0.07237333556016286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,24,128,1,float16,fp8,0,0.19208000103632608
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,24,128,1,float16,fp8,0,0.36762134234110516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,2,128,1,float16,float16,0,0.0806933343410492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,2,128,1,float16,fp8,0,0.07678399980068207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,4,128,1,float16,float16,0,0.11923199892044067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,8,128,1,float16,float16,0,0.12200533350308736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,4,128,1,float16,fp8,0,0.11852799852689107
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,24,128,1,float16,float16,0,0.09347200393676758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,24,128,1,float16,fp8,0,0.10179199775060017
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,4,128,1,float16,fp8,0,0.24913599093755087
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,24,128,1,float16,float16,0,0.1628159979979197
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,2,128,1,float16,float16,0,0.04390933116277059
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,2,128,1,float16,fp8,0,0.0458186666170756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,4,128,1,float16,fp8,0,0.06234133243560791
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,4,128,1,float16,float16,0,0.056314667065938316
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,8,128,1,float16,float16,0,0.05807999769846598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,8,128,1,float16,fp8,0,0.06301866471767426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,8,128,1,float16,fp8,0,0.1306773324807485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,24,128,1,float16,fp8,0,0.04818133513132731
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,1,128,1,float16,fp8,0,0.025589334468046825
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,2,128,1,float16,float16,0,0.027306665976842243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,1,128,1,float16,float16,0,0.04124800115823746
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,4,128,1,float16,float16,0,0.031530665854612984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,1,128,1,float16,fp8,0,0.06963199873765309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,8,128,1,float16,float16,0,0.03352533280849457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,8,128,1,float16,fp8,0,0.03581333408753077
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,24,128,1,float16,float16,0,0.025386666258176167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,24,128,1,float16,float16,0,0.03996799886226654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,24,128,1,float16,fp8,0,0.03145600110292435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,1,128,1,float16,float16,0,0.01916266605257988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,1,128,1,float16,fp8,0,0.019509332875410717
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,2,128,1,float16,fp8,0,0.027306665976842243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,4,128,1,float16,float16,0,0.021967999637126923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,4,128,1,float16,fp8,0,0.02499199906984965
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,8,128,1,float16,fp8,0,0.02532800038655599
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,24,128,1,float16,float16,0,0.017103999853134155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,4,128,1,float16,float16,0,0.21857066949208578
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,24,128,1,float16,fp8,0,0.021301334102948506
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,1,128,1,float16,fp8,0,0.01340266689658165
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,2,128,1,float16,float16,0,0.013002666334311167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,1,128,1,float16,fp8,0,0.04168533285458883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,2,128,1,float16,fp8,0,0.014064000298579534
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,4,128,1,float16,float16,0,0.015184000134468079
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,4,128,1,float16,fp8,0,0.015754666179418564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,8,128,1,float16,float16,0,0.015098666151364645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,1,128,1,float16,float16,0,0.025439999997615814
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,8,128,1,float16,fp8,0,0.015978666643301647
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,2,128,1,float16,float16,0,0.019845332950353622
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,24,128,1,float16,fp8,0,0.015226667126019796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,1,128,1,float16,float16,0,0.012981332838535309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,1,128,1,float16,fp8,0,0.013354666531085968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,4,128,1,float16,fp8,0,0.03555200000603994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,2,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,8,128,1,float16,float16,0,0.0234400009115537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,4,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,8,128,1,float16,float16,0,0.013253333667914072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,8,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,1,128,1,float16,float16,0,0.01302933320403099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,24,128,1,float16,float16,0,0.013418667018413544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,1,128,1,float16,fp8,0,0.013264000415802002
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,2,128,1,float16,fp8,0,0.021338666478792827
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,24,128,1,float16,float16,0,0.0141546664138635
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,2,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,4,128,1,float16,fp8,0,0.013264000415802002
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,4,128,1,float16,float16,0,0.012938667088747025
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,8,128,1,float16,float16,0,0.012890666723251343
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,2,128,1,float16,float16,0,0.01301866645614306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,4,128,1,float16,float16,0,0.01302933320403099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,1,128,1,float16,float16,0,0.18332266807556152
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,1,128,1,float16,fp8,0,0.18120000759760538
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,2,128,1,float16,float16,0,0.19933332999547324
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,2,128,1,float16,fp8,0,0.2017866571744283
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,1,128,1,float16,float16,0,0.013280000537633896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,2,128,1,float16,float16,0,0.013194666554530462
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,4,128,1,float16,fp8,0,0.30268265803654987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,8,128,1,float16,fp8,0,0.013290667285521826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,8,128,1,float16,fp8,0,0.3046026627222697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,8,128,1,float16,float16,0,0.26562132438023883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,1,128,1,float16,float16,0,0.10148266951243083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,1,128,1,float16,fp8,0,0.09464533130327861
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,24,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,4,128,1,float16,float16,0,0.14814399679501852
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,2,128,1,float16,fp8,0,0.1034453312555949
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,4,128,1,float16,fp8,0,0.14178133010864258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,4,128,1,float16,float16,0,0.28497066100438434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,8,128,1,float16,fp8,0,0.15495466192563376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,24,128,1,float16,float16,0,0.10150399804115295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,24,128,1,float16,fp8,0,0.1134986678759257
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,1,128,1,float16,float16,0,0.05397333204746246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,24,128,1,float16,float16,0,0.19021334250768027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,1,128,1,float16,fp8,0,0.054048001766204834
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,2,128,1,float16,float16,0,0.05807999769846598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,2,128,1,float16,float16,0,0.1092693308989207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,24,128,1,float16,fp8,0,0.2185866634051005
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,8,128,1,float16,float16,0,0.07443200051784515
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,4,128,1,float16,fp8,0,0.07463466624418895
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,1,128,1,float16,fp8,0,0.033488000432650246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,24,128,1,float16,fp8,0,0.055770665407180786
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,8,128,1,float16,float16,0,0.1487626632054647
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,2,128,1,float16,float16,0,0.033514666060606636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,2,128,1,float16,fp8,0,0.035386666655540466
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,4,128,1,float16,fp8,0,0.04214933514595032
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,2,128,1,float16,fp8,0,0.05820799867312113
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,8,128,1,float16,float16,0,0.039813332259655
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,8,128,1,float16,fp8,0,0.043509334325790405
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,4,128,1,float16,float16,0,0.06962133447329204
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,8,128,1,float16,fp8,0,0.0749013324578603
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,24,128,1,float16,float16,0,0.027477333943049114
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,1,128,1,float16,fp8,0,0.021189334491888683
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,2,128,1,float16,float16,0,0.021344001094500225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,1,128,1,float16,float16,0,0.033471999069054924
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,4,128,1,float16,float16,0,0.02532800038655599
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,4,128,1,float16,fp8,0,0.025301332275072735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,8,128,1,float16,fp8,0,0.02738133321205775
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,8,128,1,float16,float16,0,0.025253333151340485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,4,128,1,float16,float16,0,0.03987200061480204
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,1,128,1,float16,float16,0,0.016783999900023144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,24,128,1,float16,fp8,0,0.023311999936898548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,2,128,1,float16,float16,0,0.017130666722853977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,2,128,1,float16,fp8,0,0.017077332983414333
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,4,128,1,float16,float16,0,0.017509333789348602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,24,128,1,float16,fp8,0,0.033717334270477295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,8,128,1,float16,float16,0,0.017738666385412216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,8,128,1,float16,fp8,0,0.019413333386182785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,24,128,1,float16,float16,0,0.047354668378829956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,1,128,1,float16,float16,0,0.012997332960367203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,1,128,1,float16,fp8,0,0.0129120002190272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,24,128,1,float16,float16,0,0.019258666783571243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,2,128,1,float16,float16,0,0.013023999830087027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,1,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,2,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,4,128,1,float16,float16,0,0.01292266696691513
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,4,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,1,128,1,float16,float16,0,0.021253332495689392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,8,128,1,float16,float16,0,0.01312000056107839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,2,128,1,float16,fp8,0,0.022661333282788593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,1,128,1,float16,float16,0,0.011317333827416102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,1,128,1,float16,fp8,0,0.012069333344697952
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,2,128,1,float16,fp8,0,0.012165332833925882
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,24,128,1,float16,float16,0,0.013658666362365087
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,4,128,1,float16,float16,0,0.011365332951148352
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,8,128,1,float16,float16,0,0.011120000233252844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,8,128,1,float16,fp8,0,0.01328533391157786
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,8,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,4,128,1,float16,fp8,0,0.019199999670187633
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,24,128,1,float16,float16,0,0.01301866645614306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,24,128,1,float16,fp8,0,0.014090667168299357
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,1,128,1,float16,float16,0,0.01138666644692421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,24,128,1,float16,fp8,0,0.014005333185195923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,1,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,24,128,1,float16,fp8,0,0.015247999380032221
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,4,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,4,128,1,float16,fp8,0,0.013429333766301474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,8,128,1,float16,float16,0,0.01267733300725619
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,8,128,1,float16,fp8,0,0.012831999609867731
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,1,128,1,float16,float16,0,0.15693333745002747
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,1,128,1,float16,fp8,0,0.1492693324883779
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,2,128,1,float16,float16,0,0.011205332974592844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,2,128,1,float16,fp8,0,0.013178666432698568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,2,128,1,float16,float16,0,0.16370667020479837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,2,128,1,float16,fp8,0,0.15756266315778097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,4,128,1,float16,float16,0,0.2002453406651815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,4,128,1,float16,fp8,0,0.20105600357055664
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,8,128,1,float16,float16,0,0.2055199940999349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,8,128,1,float16,fp8,0,0.21169066429138184
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,24,128,1,float16,float16,0,0.013093333691358566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,24,128,1,float16,fp8,0,0.14034666617711386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,1,128,1,float16,float16,0,0.08334933718045552
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,1,128,1,float16,fp8,0,0.07962666451931
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,2,128,1,float16,float16,0,0.0867733359336853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,2,128,1,float16,fp8,0,0.08532800277074178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,8,128,1,float16,fp8,0,0.10113599896430969
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,24,128,1,float16,float16,0,0.07373333474000295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,24,128,1,float16,fp8,0,0.06832533578077953
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,1,128,1,float16,float16,0,0.04779199759165446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,2,128,1,float16,float16,0,0.011328000575304031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,1,128,1,float16,fp8,0,0.04621333380540212
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,2,128,1,float16,float16,0,0.04879466692606608
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,4,128,1,float16,float16,0,0.012362666428089142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,2,128,1,float16,fp8,0,0.04814399778842926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,4,128,1,float16,float16,0,0.05418666700522105
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,4,128,1,float16,fp8,0,0.05571199953556061
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,8,128,1,float16,float16,0,0.05426666637261709
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,8,128,1,float16,float16,0,0.10643200079600017
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,24,128,1,float16,float16,0,0.035743998984495796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,1,128,1,float16,float16,0,0.029487999776999157
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,1,128,1,float16,fp8,0,0.027984000742435455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,2,128,1,float16,fp8,0,0.029578665892283123
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,2,128,1,float16,float16,0,0.03108799954255422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,24,128,1,float16,float16,0,0.132042666276296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,4,128,1,float16,fp8,0,0.03357866654793421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,8,128,1,float16,fp8,0,0.03477333237727483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,24,128,1,float16,float16,0,0.022863999009132385
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,1,128,1,float16,float16,0,0.019167999426523846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,24,128,1,float16,fp8,0,0.025461333493391674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,1,128,1,float16,fp8,0,0.01913600042462349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,24,128,1,float16,fp8,0,0.041690667470296226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,2,128,1,float16,float16,0,0.01933866615096728
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,2,128,1,float16,fp8,0,0.019109333554903667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,4,128,1,float16,float16,0,0.032138665517171226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,8,128,1,float16,float16,0,0.02086399992307027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,8,128,1,float16,float16,0,0.033546666304270424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,24,128,1,float16,float16,0,0.015530666957298914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,24,128,1,float16,fp8,0,0.018239999810854595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,4,128,1,float16,float16,0,0.09974400202433269
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,1,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,8,128,1,float16,fp8,0,0.05608533322811127
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,2,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,4,128,1,float16,float16,0,0.015135999768972397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,4,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,8,128,1,float16,float16,0,0.015077333897352219
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,4,128,1,float16,fp8,0,0.021738665799299877
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,8,128,1,float16,fp8,0,0.021221332252025604
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,24,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,1,128,1,float16,float16,0,0.015130666395028433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,1,128,1,float16,fp8,0,0.01258133351802826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,2,128,1,float16,float16,0,0.012389333297808966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,2,128,1,float16,float16,0,0.015226667126019796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,4,128,1,float16,float16,0,0.013023999830087027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,8,128,1,float16,float16,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,4,128,1,float16,float16,0,0.021183999876181286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,8,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,8,128,1,float16,fp8,0,0.015168000012636185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,24,128,1,float16,float16,0,0.011253333340088526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,24,128,1,float16,float16,0,0.012960000584522883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,1,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,4,128,1,float16,fp8,0,0.10048000017801921
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,2,128,1,float16,float16,0,0.01128000020980835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,2,128,1,float16,fp8,0,0.012122667084137598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,4,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,4,128,1,float16,float16,0,0.011322667201360067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,4,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,8,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,24,128,1,float16,float16,0,0.013034666577974955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,24,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,1,128,1,float16,fp8,0,0.012096000214417776
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,2,128,1,float16,fp8,0,0.01209066684047381
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,1,128,1,float16,float16,0,0.011258666714032492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,4,128,1,float16,float16,0,0.010928000013033548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,4,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,2,128,1,float16,fp8,0,0.011152000476916632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,8,128,1,float16,float16,0,0.01129066695769628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,8,128,1,float16,fp8,0,0.011301333705584208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,8,128,1,float16,float16,0,0.011077333241701126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,1,128,1,float16,float16,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,1,128,1,float16,float16,0,0.1444906691710154
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,1,128,1,float16,fp8,0,0.13528000315030417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,2,128,1,float16,float16,0,0.011226666470368704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,1,128,1,float16,float16,0,0.011002667248249054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,2,128,1,float16,fp8,0,0.13942399621009827
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,4,128,1,float16,fp8,0,0.156058669090271
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,8,128,1,float16,float16,0,0.17178666591644287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,8,128,1,float16,fp8,0,0.16134933630625406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,24,128,1,float16,float16,0,0.10482666889826457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,1,128,1,float16,float16,0,0.08098133405049641
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,24,128,1,float16,fp8,0,0.10109866658846538
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,1,128,1,float16,fp8,0,0.07312533259391785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,2,128,1,float16,float16,0,0.08208000163237254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,2,128,1,float16,fp8,0,0.07469333211580913
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,4,128,1,float16,float16,0,0.08538132905960083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,4,128,1,float16,fp8,0,0.0819413314263026
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,8,128,1,float16,fp8,0,0.08301866551240285
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,24,128,1,float16,float16,0,0.05403733253479004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,1,128,1,float16,float16,0,0.045696000258127846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,1,128,1,float16,fp8,0,0.04279999931653341
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,2,128,1,float16,fp8,0,0.04394133388996124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,4,128,1,float16,float16,0,0.04808000226815542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,2,128,1,float16,float16,0,0.1509173313776652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,4,128,1,float16,fp8,0,0.04800533254941305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,8,128,1,float16,float16,0,0.0487306664387385
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,24,128,1,float16,fp8,0,0.013274667163689932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,8,128,1,float16,fp8,0,0.047882666190465294
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,24,128,1,float16,float16,0,0.03141866624355316
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,24,128,1,float16,fp8,0,0.031856000423431396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,1,128,1,float16,fp8,0,0.02731200059254964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,2,128,1,float16,fp8,0,0.027429332335789997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,2,128,1,float16,float16,0,0.029311999678611755
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,2,128,1,float16,float16,0,0.045834665497144066
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,4,128,1,float16,fp8,0,0.028143999477227528
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,8,128,1,float16,float16,0,0.02956266701221466
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,8,128,1,float16,fp8,0,0.029477333029111225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,24,128,1,float16,float16,0,0.019317333896954853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,24,128,1,float16,fp8,0,0.021055998901526134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,1,128,1,float16,float16,0,0.019258666783571243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,2,128,1,float16,fp8,0,0.019007999449968338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,1,128,1,float16,float16,0,0.028079998989899952
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,8,128,1,float16,float16,0,0.08779199918111165
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,4,128,1,float16,float16,0,0.019146667172511418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,4,128,1,float16,float16,0,0.02962133288383484
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,8,128,1,float16,fp8,0,0.01889066646496455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,24,128,1,float16,float16,0,0.015279999623696009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,1,128,1,float16,float16,0,0.015029333531856537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,1,128,1,float16,fp8,0,0.017551999539136887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,2,128,1,float16,float16,0,0.015168000012636185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,2,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,4,128,1,float16,float16,0,0.1710666616757711
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,4,128,1,float16,fp8,0,0.01886933296918869
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,4,128,1,float16,float16,0,0.015210667004187902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,24,128,1,float16,fp8,0,0.05478399991989136
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,8,128,1,float16,float16,0,0.015061333775520325
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,8,128,1,float16,fp8,0,0.01516266663869222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,1,128,1,float16,float16,0,0.011146667102972666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,24,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,1,128,1,float16,fp8,0,0.011125333607196808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,2,128,1,float16,float16,0,0.01129066695769628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,1,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,2,128,1,float16,fp8,0,0.011695999652147293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,2,128,1,float16,float16,0,0.019343999524911244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,8,128,1,float16,float16,0,0.011493333925803503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,8,128,1,float16,fp8,0,0.013471999516089758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,24,128,1,float16,float16,0,0.011338666081428528
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,4,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,1,128,1,float16,float16,0,0.011114666859308878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,1,128,1,float16,fp8,0,0.010863999525705973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,2,128,1,float16,float16,0,0.011178666104873022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,24,128,1,float16,fp8,0,0.016352000335852306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,2,128,1,float16,fp8,0,0.011136000355084738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,24,128,1,float16,float16,0,0.013093333691358566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,4,128,1,float16,fp8,0,0.011050666371981302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,4,128,1,float16,float16,0,0.01129066695769628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,4,128,1,float16,fp8,0,0.0129120002190272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,24,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,24,128,1,float16,float16,0,0.011450666934251785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,1,128,1,float16,fp8,0,0.011087999989589056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,8,128,1,float16,float16,0,0.01923199991385142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,2,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,24,128,1,float16,fp8,0,0.013178666432698568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,8,128,1,float16,float16,0,0.011247999966144562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,4,128,1,float16,fp8,0,0.011349332829316458
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,8,128,1,float16,fp8,0,0.011237333218256632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,1,128,1,float16,float16,0,0.015008000036080679
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,1,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,0,0.0170666662355264
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,4,128,1,float16,float16,0,0.011034666250149408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,0,0.019258666783571243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,0,0.017210666090250015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,4,128,1,float16,float16,0,0.01108266661564509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,0,0.0295413335164388
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,0,0.026021334032217663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,24,128,1,float16,float16,0,0.04630400240421295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,1,128,1,float16,float16,0,0.010885333021481832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,1,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,0,0.013151999562978745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,0,0.013797332843144735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,0,0.019248000035683315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,0,0.01711999997496605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,24,128,1,float16,fp8,0,0.03562666724125544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,24,128,1,float16,float16,0,0.025434667865435284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,1,128,1,float16,float16,0,0.01118933285276095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,24,128,1,float16,fp8,0,0.023311999936898548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,0,0.01516266663869222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,0,0.010879999647537867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,0,0.011258666714032492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,24,128,1,float16,float16,0,0.017360000560681026
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,8,128,1,float16,float16,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,1,128,1,float16,float16,0,0.011077333241701126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,1,128,1,float16,fp8,0,0.01101333275437355
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,0,0.009279999881982803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,0,0.010832000523805618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,0,0.011066666493813196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,0,0.00916800027092298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,0,0.009919999788204828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,0,0.01098666712641716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,0,0.010847999403874079
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,24,128,1,float16,fp8,0,0.01321600005030632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,24,128,1,float16,float16,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,1,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,8,128,1,float16,fp8,0,0.011237333218256632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,0,0.009002666920423508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,1,128,1,float16,float16,0,0.011285333583752314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,0,0.01090666651725769
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,24,128,1,float16,float16,0,0.011653333902359009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,0,0.014949332922697067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,1,128,1,float16,float16,0,0.010928000013033548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,1,128,1,float16,fp8,0,0.011231999844312668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,0,0.009061333412925402
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,0,0.009088000282645226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,0,0.009269333134094873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,0,0.008826666822036108
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,1,128,1,float16,fp8,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,24,128,1,float16,float16,0,0.010954666882753372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,24,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,24,128,1,float16,fp8,0,0.011114666859308878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,1,128,1,float16,float16,0,0.010954666882753372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,1,128,1,float16,float16,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,0,0.008965333302815756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,0,0.00891733355820179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,24,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,0,0.009258666386206945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,24,128,1,float16,float16,0,0.010602666685978571
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,1,128,1,float16,float16,0,0.011007999380429586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,1,128,1,float16,fp8,0,0.010933333386977514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,0,0.009290666629870733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,0,0.008933333059151968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,1,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,0,0.008965333302815756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,0,0.00926399976015091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,24,128,1,float16,float16,0,0.009232000137368837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,1,128,1,float16,float16,0,0.011136000355084738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,24,128,1,float16,fp8,0,0.01089599976936976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,0,0.009178666397929192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,0,0.00926399976015091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,0,0.009237333511312803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,0,0.00915733352303505
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,0,0.009365333244204521
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,0,0.008954666554927826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,24,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,1,128,1,float16,fp8,0,0.011125333607196808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,0,0.009125333279371262
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,1,128,1,float16,fp8,0,5.63752555847168
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,1,128,1,float16,float16,0,7.707871754964192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,2,128,1,float16,fp8,0,5.767050425211589
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,2,128,1,float16,float16,0,7.789365132649739
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,4,128,1,float16,float16,0,8.014730453491211
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,4,128,1,float16,fp8,0,6.2271467844645185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,16,128,1,float16,float16,0,4.202122688293457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,16,128,1,float16,fp8,0,3.4987147649129233
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,8,128,1,float16,float16,0,8.283386866251627
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,1,128,1,float16,fp8,0,2.8488105138142905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,2,128,1,float16,float16,0,3.5633281071980796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,2,128,1,float16,fp8,0,3.0666987101236978
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,4,128,1,float16,float16,0,3.939674695332845
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,8,128,1,float16,float16,0,3.9374240239461265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,16,128,1,float16,float16,0,2.1203999519348145
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,8,128,1,float16,fp8,0,6.345200220743815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,1,128,1,float16,float16,0,3.5828800201416016
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,1,128,1,float16,float16,0,1.8253119786580403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,1,128,1,float16,fp8,0,1.7576853434244792
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,2,128,1,float16,float16,0,1.9150667190551758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,2,128,1,float16,fp8,0,1.5303573608398438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,4,128,1,float16,float16,0,1.9983146985371907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,4,128,1,float16,fp8,0,3.5089759826660156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,8,128,1,float16,fp8,0,1.743077278137207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,16,128,1,float16,float16,0,1.0893706480662029
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,8,128,1,float16,fp8,0,3.465317408243815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,1,128,1,float16,float16,0,0.9788266817728678
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,16,128,1,float16,fp8,0,0.9713919957478842
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,16,128,1,float16,fp8,0,1.9724853833516438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,2,128,1,float16,float16,0,0.9738559722900391
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,2,128,1,float16,fp8,0,0.8286399841308594
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,4,128,1,float16,float16,0,1.0594933032989502
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,4,128,1,float16,fp8,0,0.9238346417744955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,4,128,1,float16,fp8,0,1.691391944885254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,8,128,1,float16,fp8,0,0.909173329671224
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,8,128,1,float16,float16,0,1.060421307881673
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,8,128,1,float16,float16,0,2.0327733357747397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,1,128,1,float16,fp8,0,3.2528053919474282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,1,128,1,float16,fp8,0,0.8187253475189209
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,2,128,1,float16,fp8,0,3.372997283935547
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,4,128,1,float16,float16,0,4.699861208597819
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,16,128,1,float16,fp8,0,2.1686399777730307
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,8,128,1,float16,float16,0,4.6799360911051435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,1,128,1,float16,float16,0,4.023440043131511
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,1,128,1,float16,fp8,0,1.6800319353739421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,2,128,1,float16,float16,0,2.1528746287027993
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,2,128,1,float16,float16,0,4.139391899108887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,4,128,1,float16,fp8,0,3.7401386896769204
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,2,128,1,float16,fp8,0,1.7427147229512532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,4,128,1,float16,float16,0,2.363258679707845
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,4,128,1,float16,fp8,0,2.042234738667806
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,16,128,1,float16,float16,0,2.4210666020711265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,16,128,1,float16,float16,0,1.2464000384012859
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,8,128,1,float16,fp8,0,1.9847040176391602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,16,128,1,float16,fp8,0,1.1548373699188232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,1,128,1,float16,fp8,0,0.8902880350748698
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,2,128,1,float16,fp8,0,0.924394687016805
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,2,128,1,float16,float16,0,1.096127986907959
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,4,128,1,float16,fp8,0,1.0441280206044514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,8,128,1,float16,float16,0,1.2006560166676838
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,16,128,1,float16,float16,0,0.6674079895019531
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,16,128,1,float16,fp8,0,0.619493325551351
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,8,128,1,float16,fp8,0,3.8903414408365884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,1,128,1,float16,float16,0,2.0505226453145347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,1,128,1,float16,float16,0,1.0860586961110432
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,1,128,1,float16,fp8,0,0.49324798583984375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,2,128,1,float16,float16,0,0.6053119897842407
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,2,128,1,float16,fp8,0,0.509498675664266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,4,128,1,float16,float16,0,0.6518880128860474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,4,128,1,float16,float16,0,1.2340479691823323
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,8,128,1,float16,float16,0,0.6581600109736124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,8,128,1,float16,fp8,0,0.5737333297729492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,8,128,1,float16,fp8,0,1.0832586288452148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,1,128,1,float16,float16,0,2.909887949625651
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,2,128,1,float16,fp8,0,2.431450684865316
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,2,128,1,float16,float16,0,3.028736114501953
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,4,128,1,float16,float16,0,3.3058719635009766
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,4,128,1,float16,fp8,0,0.5812426805496216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,8,128,1,float16,float16,0,3.260080019632975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,16,128,1,float16,float16,0,1.7828586896260579
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,16,128,1,float16,fp8,0,1.616986592610677
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,1,128,1,float16,float16,0,1.4684054056803386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,1,128,1,float16,fp8,0,2.322389284769694
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,8,128,1,float16,float16,0,2.315397262573242
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,1,128,1,float16,fp8,0,1.2106773058573406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,2,128,1,float16,float16,0,1.4998772939046223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,4,128,1,float16,float16,0,1.7151360511779785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,4,128,1,float16,fp8,0,1.4476745923360188
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,8,128,1,float16,float16,0,1.6852960586547852
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,8,128,1,float16,fp8,0,1.460634708404541
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,16,128,1,float16,float16,0,0.9303893248240153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,8,128,1,float16,fp8,0,2.937642733256022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,1,128,1,float16,float16,0,0.774282693862915
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,1,128,1,float16,fp8,0,0.6511573394139608
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,4,128,1,float16,float16,0,0.8879573345184326
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,4,128,1,float16,fp8,0,0.8008693059285482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,2,128,1,float16,fp8,0,1.28875732421875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,8,128,1,float16,fp8,0,0.7741706371307373
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,16,128,1,float16,float16,0,0.49460268020629883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,16,128,1,float16,fp8,0,0.46990398565928143
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,1,128,1,float16,float16,0,0.41676799456278485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,16,128,1,float16,fp8,0,0.8517706394195557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,1,128,1,float16,fp8,0,0.36819199721018475
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,2,128,1,float16,float16,0,0.8057920138041178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,2,128,1,float16,fp8,0,0.37654932339986164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,1,128,1,float16,float16,0,0.5733280181884766
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,4,128,1,float16,fp8,0,0.41782931486765545
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,8,128,1,float16,float16,0,0.8755679925282797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,4,128,1,float16,float16,0,0.48049600919087726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,8,128,1,float16,fp8,0,0.4383840163548787
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,4,128,1,float16,fp8,0,2.912869453430176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,1,128,1,float16,float16,0,3.807114601135254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,1,128,1,float16,fp8,0,3.028421401977539
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,2,128,1,float16,float16,0,0.4289226531982422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,2,128,1,float16,fp8,0,0.6739306449890137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,2,128,1,float16,fp8,0,3.2004693349202475
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,4,128,1,float16,float16,0,4.595701217651367
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,8,128,1,float16,float16,0,0.4774826765060425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,16,128,1,float16,float16,0,2.442058722178141
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,16,128,1,float16,fp8,0,2.209184010823568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,8,128,1,float16,float16,0,4.516597429911296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,1,128,1,float16,float16,0,1.9186293284098308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,2,128,1,float16,float16,0,2.018880049387614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,2,128,1,float16,fp8,0,1.759749412536621
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,4,128,1,float16,float16,0,2.2799199422200522
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,4,128,1,float16,fp8,0,2.0506933530171714
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,8,128,1,float16,float16,0,2.2997387250264487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,4,128,1,float16,fp8,0,4.297893206278483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,8,128,1,float16,fp8,0,3.9351733525594077
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,16,128,1,float16,float16,0,1.2422773043314617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,1,128,1,float16,float16,0,0.9883733590443929
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,1,128,1,float16,fp8,0,0.8104000091552734
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,2,128,1,float16,float16,0,1.0328586896260579
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,2,128,1,float16,float16,0,3.858037312825521
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,2,128,1,float16,fp8,0,0.8583626747131348
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,4,128,1,float16,float16,0,1.1403520107269287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,4,128,1,float16,fp8,0,0.9947786331176758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,8,128,1,float16,fp8,0,1.010864019393921
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,16,128,1,float16,float16,0,0.6438773473103842
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,16,128,1,float16,fp8,0,0.604421337445577
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,1,128,1,float16,fp8,0,0.4439680178960164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,2,128,1,float16,fp8,0,0.47149864832560223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,1,128,1,float16,fp8,0,1.5555040041605632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,4,128,1,float16,float16,0,0.6139413515726725
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,4,128,1,float16,fp8,0,0.553978681564331
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,16,128,1,float16,fp8,0,1.1382986704508464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,8,128,1,float16,float16,0,0.6082666714986166
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,8,128,1,float16,fp8,0,0.5508426825205485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,16,128,1,float16,float16,0,0.3546559810638428
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,16,128,1,float16,fp8,0,0.34109334150950116
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,1,128,1,float16,fp8,0,0.25949867566426593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,8,128,1,float16,float16,0,1.1815679868062336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,1,128,1,float16,float16,0,0.5135360161463419
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,4,128,1,float16,fp8,0,0.3004480004310608
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,8,128,1,float16,fp8,0,2.0038986206054688
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,2,128,1,float16,float16,0,0.5390293200810751
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,1,128,1,float16,float16,0,2.2245920499165854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,1,128,1,float16,float16,0,0.2896266579627991
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,2,128,1,float16,float16,0,0.29821866750717163
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,4,128,1,float16,float16,0,0.3307360013326009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,2,128,1,float16,fp8,0,1.9297547340393066
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,8,128,1,float16,float16,0,0.3420373201370239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,4,128,1,float16,float16,0,2.747957229614258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,4,128,1,float16,fp8,0,2.326650619506836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,1,128,1,float16,fp8,0,1.8006025950113933
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,8,128,1,float16,float16,0,2.713072141011556
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,8,128,1,float16,fp8,0,2.396165370941162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,16,128,1,float16,float16,0,1.4774079322814941
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,1,128,1,float16,float16,0,1.1367146968841553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,1,128,1,float16,fp8,0,0.9319307009379069
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,2,128,1,float16,float16,0,1.1660959720611572
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,2,128,1,float16,fp8,0,1.0041226545969646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,4,128,1,float16,float16,0,1.3946773211161296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,4,128,1,float16,fp8,0,1.3141333262125652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,8,128,1,float16,float16,0,1.4136053721110027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,16,128,1,float16,float16,0,0.768613338470459
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,16,128,1,float16,fp8,0,0.740069309870402
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,1,128,1,float16,fp8,0,0.4984746774037679
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,2,128,1,float16,float16,0,0.6055093208948771
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,16,128,1,float16,fp8,0,1.4204319318135579
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,2,128,1,float16,fp8,0,0.531440019607544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,2,128,1,float16,float16,0,2.32259734471639
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,4,128,1,float16,float16,0,0.7173973719278971
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,4,128,1,float16,fp8,0,0.6491359869639078
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,8,128,1,float16,float16,0,0.7414613564809164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,8,128,1,float16,fp8,0,0.6872639656066895
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,16,128,1,float16,float16,0,0.40642134348551434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,16,128,1,float16,fp8,0,0.3996693293253581
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,1,128,1,float16,fp8,0,0.27378666400909424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,8,128,1,float16,fp8,0,1.2256266276041667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,2,128,1,float16,float16,0,0.33196266492207843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,4,128,1,float16,float16,0,0.3864213228225708
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,4,128,1,float16,fp8,0,0.3463679949442546
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,2,128,1,float16,fp8,0,0.2679893374443054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,16,128,1,float16,float16,0,0.2325119972229004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,16,128,1,float16,fp8,0,0.2262453238169352
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,1,128,1,float16,float16,0,0.18805332978566489
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,1,128,1,float16,fp8,0,0.16803733507792154
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,2,128,1,float16,float16,0,0.19088000059127808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,1,128,1,float16,float16,0,0.3158880074818929
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,2,128,1,float16,fp8,0,0.2895039916038513
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,8,128,1,float16,float16,0,0.21740265687306723
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,8,128,1,float16,fp8,0,0.19842666387557983
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,8,128,1,float16,fp8,0,0.3109813332557678
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,2,128,1,float16,fp8,0,0.17337600390116373
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,1,128,1,float16,float16,0,2.063632011413574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,1,128,1,float16,float16,0,0.5767093499501547
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,1,128,1,float16,fp8,0,1.7432907422383626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,8,128,1,float16,float16,0,0.382421334584554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,2,128,1,float16,float16,0,2.2007039388020835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,4,128,1,float16,float16,0,2.8814239501953125
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,4,128,1,float16,fp8,0,2.680389404296875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,8,128,1,float16,float16,0,2.752842585245768
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,16,128,1,float16,float16,0,1.5380160013834636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,16,128,1,float16,fp8,0,1.5431733131408691
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,1,128,1,float16,float16,0,1.058181365331014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,8,128,1,float16,fp8,0,0.3540053367614746
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,2,128,1,float16,fp8,0,1.9973492622375488
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,1,128,1,float16,fp8,0,0.8918399810791016
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,2,128,1,float16,fp8,0,0.9793333212534586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,4,128,1,float16,float16,0,1.4241013526916504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,4,128,1,float16,fp8,0,1.357360045115153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,8,128,1,float16,float16,0,1.4646666844685872
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,8,128,1,float16,fp8,0,2.5108159383138022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,8,128,1,float16,fp8,0,1.3433119455973308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,16,128,1,float16,float16,0,0.7831573486328125
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,16,128,1,float16,fp8,0,0.7923466364542643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,1,128,1,float16,fp8,0,0.4668480157852173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,2,128,1,float16,float16,0,0.5713813304901123
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,2,128,1,float16,fp8,0,0.5144160191218058
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,2,128,1,float16,float16,0,1.1429333686828613
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,4,128,1,float16,fp8,0,0.6483093500137329
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,8,128,1,float16,fp8,0,0.6662506659825643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,16,128,1,float16,float16,0,0.409824013710022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,16,128,1,float16,fp8,0,0.41807464758555096
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,1,128,1,float16,float16,0,0.2905866702397664
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,4,128,1,float16,fp8,0,0.19686400890350342
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,1,128,1,float16,fp8,0,0.2588213284810384
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,2,128,1,float16,fp8,0,0.2762133280436198
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,4,128,1,float16,float16,0,0.39310399691263836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,4,128,1,float16,fp8,0,0.3766026496887207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,8,128,1,float16,float16,0,0.381872018178304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,8,128,1,float16,fp8,0,0.35577066739400226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,8,128,1,float16,float16,0,0.720240036646525
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,16,128,1,float16,float16,0,0.22866666316986084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,16,128,1,float16,fp8,0,0.2305333415667216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,2,128,1,float16,float16,0,0.17482133706410727
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,4,128,1,float16,float16,0,0.20968000094095865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,2,128,1,float16,float16,0,0.3062506715456645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,4,128,1,float16,float16,0,0.2093600034713745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,4,128,1,float16,fp8,0,0.19158399105072021
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,8,128,1,float16,float16,0,0.2178773283958435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,8,128,1,float16,fp8,0,0.20006932814915976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,16,128,1,float16,float16,0,0.13618133465449014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,1,128,1,float16,float16,0,0.10704533259073894
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,1,128,1,float16,fp8,0,0.09698133667310078
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,2,128,1,float16,float16,0,0.10990400115648906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,2,128,1,float16,fp8,0,0.10204266508420308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,1,128,1,float16,fp8,0,0.1500320037206014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,4,128,1,float16,float16,0,0.12122133374214172
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,4,128,1,float16,fp8,0,0.11749333143234253
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,2,128,1,float16,fp8,0,0.1601706643899282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,8,128,1,float16,fp8,0,0.1195146640141805
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,1,128,1,float16,float16,0,1.237178643544515
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,1,128,1,float16,fp8,0,1.0633066495259602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,4,128,1,float16,float16,0,0.7226773103078207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,16,128,1,float16,fp8,0,0.12814399600028992
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,2,128,1,float16,fp8,0,1.1880586942036946
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,4,128,1,float16,fp8,0,1.594805399576823
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,4,128,1,float16,float16,0,1.8523306846618652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,8,128,1,float16,float16,0,0.1229759951432546
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,8,128,1,float16,fp8,0,1.642581303914388
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,16,128,1,float16,float16,0,1.0090773105621338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,16,128,1,float16,fp8,0,1.0347999731699626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,1,128,1,float16,fp8,0,0.5556159814198812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,2,128,1,float16,float16,0,0.6890772978464762
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,2,128,1,float16,fp8,0,0.6147679885228475
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,4,128,1,float16,float16,0,0.9190826416015625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,4,128,1,float16,fp8,0,0.9213173389434814
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,8,128,1,float16,float16,0,0.9355093638102213
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,1,128,1,float16,float16,0,0.165093332529068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,16,128,1,float16,float16,0,0.5168373187383016
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,1,128,1,float16,float16,0,0.33380266030629474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,1,128,1,float16,float16,0,0.5364586512247721
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,1,128,1,float16,fp8,0,0.2980479995409648
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,2,128,1,float16,float16,0,0.3585280179977417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,2,128,1,float16,fp8,0,0.3285706639289856
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,1,128,1,float16,float16,0,0.6341866652170817
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,2,128,1,float16,float16,0,1.3902346293131511
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,4,128,1,float16,fp8,0,0.44153066476186115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,8,128,1,float16,float16,0,0.4548373222351074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,8,128,1,float16,fp8,0,0.47386133670806885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,16,128,1,float16,float16,0,0.27568533023198444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,1,128,1,float16,float16,0,0.18425067265828451
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,1,128,1,float16,fp8,0,0.16315733393033346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,2,128,1,float16,float16,0,0.19896533091862997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,8,128,1,float16,fp8,0,0.8547786871592203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,4,128,1,float16,float16,0,0.24964799483617148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,4,128,1,float16,fp8,0,0.24645866950352988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,16,128,1,float16,fp8,0,0.5359520117441813
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,8,128,1,float16,fp8,0,0.24441067377726236
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,16,128,1,float16,float16,0,0.15668800473213196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,1,128,1,float16,float16,0,0.10905067125956218
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,16,128,1,float16,fp8,0,0.15851733088493347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,2,128,1,float16,float16,0,0.11436266700426738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,2,128,1,float16,fp8,0,0.10726933677991231
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,4,128,1,float16,float16,0,0.13132799665133157
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,4,128,1,float16,fp8,0,0.13129599889119467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,8,128,1,float16,float16,0,0.14138133327166238
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,8,128,1,float16,fp8,0,0.13242133458455405
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,2,128,1,float16,fp8,0,0.17504000663757324
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,16,128,1,float16,float16,0,0.08647466699282329
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,1,128,1,float16,float16,0,0.07206933200359344
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,8,128,1,float16,float16,0,0.25005332628885907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,2,128,1,float16,float16,0,0.07469333211580913
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,2,128,1,float16,fp8,0,0.07065600156784058
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,1,128,1,float16,fp8,0,0.1011199951171875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,4,128,1,float16,fp8,0,0.08077333370844524
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,4,128,1,float16,float16,0,0.4798239866892497
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,8,128,1,float16,fp8,0,0.08261333405971527
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,8,128,1,float16,float16,0,0.08323733508586884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,16,128,1,float16,fp8,0,0.28667734066645306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,1,128,1,float16,float16,0,1.257034699122111
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,16,128,1,float16,fp8,0,0.08833600083986919
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,1,128,1,float16,fp8,0,1.0781226952870686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,1,128,1,float16,fp8,0,0.0666720022757848
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,2,128,1,float16,fp8,0,1.2478880087534587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,4,128,1,float16,float16,0,2.0752693812052407
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,4,128,1,float16,fp8,0,2.0264639854431152
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,8,128,1,float16,float16,0,1.9164692560831706
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,8,128,1,float16,fp8,0,1.8398879369099934
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,16,128,1,float16,float16,0,1.1253386338551838
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,1,128,1,float16,float16,0,0.628661314646403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,8,128,1,float16,float16,0,1.754960060119629
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,1,128,1,float16,fp8,0,0.5532159805297852
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,2,128,1,float16,float16,0,1.3744640350341797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,4,128,1,float16,float16,0,0.0825439989566803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,4,128,1,float16,float16,0,1.0202133655548096
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,2,128,1,float16,fp8,0,0.637994647026062
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,8,128,1,float16,float16,0,1.0398879845937092
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,8,128,1,float16,fp8,0,1.0312533378601074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,16,128,1,float16,float16,0,0.5671573479970297
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,1,128,1,float16,fp8,0,0.29452266295750934
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,16,128,1,float16,fp8,0,1.199621359507243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,2,128,1,float16,fp8,0,0.34136533737182617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,4,128,1,float16,float16,0,0.513429323832194
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,4,128,1,float16,fp8,0,0.48555199305216473
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,8,128,1,float16,fp8,0,0.5016213258107504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,4,128,1,float16,fp8,0,0.9268426895141602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,2,128,1,float16,float16,0,0.6975733439127604
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,16,128,1,float16,fp8,0,0.3211626609166463
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,1,128,1,float16,float16,0,0.17659199237823486
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,1,128,1,float16,float16,0,0.32474666833877563
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,16,128,1,float16,fp8,0,0.6122666597366333
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,2,128,1,float16,fp8,0,0.1816906730333964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,4,128,1,float16,float16,0,0.2778826753298442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,4,128,1,float16,fp8,0,0.27109332879384357
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,8,128,1,float16,fp8,0,0.27240532636642456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,16,128,1,float16,float16,0,0.16383999586105347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,8,128,1,float16,float16,0,0.5245386759440104
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,1,128,1,float16,float16,0,0.10243200262387593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,1,128,1,float16,fp8,0,0.0942026674747467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,2,128,1,float16,float16,0,0.10963199536005656
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,2,128,1,float16,fp8,0,0.10385599732398987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,1,128,1,float16,fp8,0,0.16225066781044006
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,4,128,1,float16,float16,0,0.14517866571744284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,4,128,1,float16,fp8,0,0.13390933473904928
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,2,128,1,float16,float16,0,0.362885316212972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,8,128,1,float16,fp8,0,0.14537066221237183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,8,128,1,float16,float16,0,0.2611306707064311
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,16,128,1,float16,float16,0,0.0932373305161794
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,16,128,1,float16,fp8,0,0.08984532952308655
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,1,128,1,float16,fp8,0,0.059749335050582886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,2,128,1,float16,float16,0,0.06752533217271169
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,16,128,1,float16,fp8,0,0.17492800951004028
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,4,128,1,float16,float16,0,0.07943999767303467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,4,128,1,float16,fp8,0,0.08057066798210144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,8,128,1,float16,float16,0,0.08051733175913493
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,16,128,1,float16,float16,0,0.04987200101216634
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,16,128,1,float16,fp8,0,0.054117331902186074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,1,128,1,float16,float16,0,0.042090664307276406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,1,128,1,float16,fp8,0,0.03864533454179764
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,8,128,1,float16,float16,0,0.1463466684023539
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,2,128,1,float16,float16,0,0.04353600243727366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,2,128,1,float16,fp8,0,0.03999999910593033
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,4,128,1,float16,float16,0,0.04804266492525736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,4,128,1,float16,fp8,0,0.04855466882387797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,8,128,1,float16,float16,0,0.04819199939568838
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,2,128,1,float16,fp8,0,0.06435200075308482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,8,128,1,float16,fp8,0,0.08152000109354655
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,1,128,1,float16,float16,0,0.7705066998799642
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,1,128,1,float16,fp8,0,0.6930720011393229
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,2,128,1,float16,float16,0,0.8782880306243896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,2,128,1,float16,float16,0,0.1949066718419393
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,2,128,1,float16,fp8,0,0.8193919658660889
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,4,128,1,float16,float16,0,1.333456039428711
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,4,128,1,float16,fp8,0,1.2130080064137776
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,1,128,1,float16,float16,0,0.06426133215427399
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,1,128,1,float16,float16,0,0.39688531557718915
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,8,128,1,float16,fp8,0,1.2761066754659016
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,16,128,1,float16,fp8,0,0.8394080003102621
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,16,128,1,float16,float16,0,0.7640373706817627
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,2,128,1,float16,float16,0,0.444976011912028
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,2,128,1,float16,fp8,0,0.4226826826731364
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,4,128,1,float16,float16,0,0.6682399908701578
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,4,128,1,float16,fp8,0,0.7251359621683756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,8,128,1,float16,float16,0,0.6529119809468588
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,8,128,1,float16,fp8,0,0.715930700302124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,16,128,1,float16,float16,0,0.38929065068562824
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,16,128,1,float16,fp8,0,0.4315626621246338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,1,128,1,float16,fp8,0,0.1978506644566854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,2,128,1,float16,fp8,0,0.2297333280245463
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,4,128,1,float16,float16,0,0.350053350130717
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,1,128,1,float16,fp8,0,0.3626399834950765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,4,128,1,float16,fp8,0,0.36682132879892987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,16,128,1,float16,float16,0,0.2982826630274455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,8,128,1,float16,float16,0,0.33586664994557697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,8,128,1,float16,fp8,0,0.3787999947865804
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,16,128,1,float16,float16,0,0.20826133092244467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,1,128,1,float16,float16,0,0.11899200081825256
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,1,128,1,float16,fp8,0,0.10750933488210042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,2,128,1,float16,fp8,0,0.12125866611798604
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,1,128,1,float16,float16,0,0.21338667472203574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,2,128,1,float16,float16,0,0.24274667104085287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,4,128,1,float16,fp8,0,0.18378132581710815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,8,128,1,float16,fp8,0,0.04839999973773956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,8,128,1,float16,fp8,0,0.18702399730682373
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,16,128,1,float16,float16,0,0.11583999792734782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,16,128,1,float16,fp8,0,0.12356799840927124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,1,128,1,float16,float16,0,0.06977599859237671
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,1,128,1,float16,fp8,0,0.06596800188223521
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,16,128,1,float16,fp8,0,0.22914133469263712
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,2,128,1,float16,fp8,0,0.07282133400440216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,2,128,1,float16,float16,0,0.13702399532000223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,4,128,1,float16,fp8,0,0.09532800316810608
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,8,128,1,float16,float16,0,0.1037600040435791
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,8,128,1,float16,float16,0,1.2953333059946697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,16,128,1,float16,fp8,0,0.06479466458161671
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,1,128,1,float16,float16,0,0.04613333443800608
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,1,128,1,float16,fp8,0,0.044112001856168113
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,2,128,1,float16,float16,0,0.048207998275756836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,4,128,1,float16,float16,0,0.05630933245023092
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,2,128,1,float16,float16,0,0.0745066652695338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,4,128,1,float16,fp8,0,0.057946667075157166
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,4,128,1,float16,float16,0,0.0932373305161794
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,8,128,1,float16,float16,0,0.05829333265622457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,8,128,1,float16,fp8,0,0.09868799646695454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,16,128,1,float16,float16,0,0.03962666789690653
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,16,128,1,float16,fp8,0,0.04223999877770742
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,1,128,1,float16,float16,0,0.03357866654793421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,2,128,1,float16,float16,0,0.03425066669782003
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,2,128,1,float16,fp8,0,0.03376533339420954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,4,128,1,float16,float16,0,0.03907199949026108
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,2,128,1,float16,fp8,0,0.047983999053637184
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,8,128,1,float16,float16,0,0.03982399900754293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,8,128,1,float16,fp8,0,0.03977599988381068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,8,128,1,float16,fp8,0,0.060218666990598045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,1,128,1,float16,float16,0,0.8244746526082357
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,1,128,1,float16,fp8,0,0.7502453327178955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,8,128,1,float16,float16,0,0.18290666739145914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,1,128,1,float16,fp8,0,0.031471999982992806
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,2,128,1,float16,float16,0,0.9431946277618408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,4,128,1,float16,fp8,0,0.03974399964014689
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,2,128,1,float16,fp8,0,0.9152853488922119
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,8,128,1,float16,float16,0,1.5857973098754883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,4,128,1,float16,float16,0,1.6400853792826335
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,4,128,1,float16,fp8,0,1.6608853340148926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,16,128,1,float16,fp8,0,1.0241759618123372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,8,128,1,float16,fp8,0,1.5115466117858887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,1,128,1,float16,float16,0,0.42348265647888184
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,16,128,1,float16,float16,0,0.060122668743133545
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,2,128,1,float16,fp8,0,0.4723946650822957
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,1,128,1,float16,fp8,0,0.3874186674753825
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,4,128,1,float16,float16,0,0.8146826426188151
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,8,128,1,float16,float16,0,0.832266648610433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,8,128,1,float16,fp8,0,0.8324159781138102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,1,128,1,float16,float16,0,0.218831996122996
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,16,128,1,float16,float16,0,0.45953599611918133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,16,128,1,float16,fp8,0,0.5224959850311279
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,4,128,1,float16,float16,0,0.19156799713770548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,1,128,1,float16,fp8,0,0.21035200357437134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,2,128,1,float16,fp8,0,0.25221866369247437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,4,128,1,float16,fp8,0,0.3829653263092041
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,8,128,1,float16,float16,0,0.42764798800150555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,8,128,1,float16,fp8,0,0.4100586573282878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,4,128,1,float16,fp8,0,0.7942399978637695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,1,128,1,float16,float16,0,0.12123733758926392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,2,128,1,float16,float16,0,0.1372266709804535
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,16,128,1,float16,float16,0,0.907258669535319
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,2,128,1,float16,fp8,0,0.13346667091051737
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,2,128,1,float16,float16,0,0.2585066755612691
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,4,128,1,float16,float16,0,0.22091732422510782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,4,128,1,float16,float16,0,0.3961919943491618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,4,128,1,float16,fp8,0,0.2070293426513672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,8,128,1,float16,float16,0,0.21338667472203574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,8,128,1,float16,fp8,0,0.21129600207010904
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,2,128,1,float16,float16,0,0.49092264970143634
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,1,128,1,float16,float16,0,0.06841599941253662
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,16,128,1,float16,float16,0,0.2406346599260966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,2,128,1,float16,float16,0,0.07550933460394542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,16,128,1,float16,fp8,0,0.26982933282852173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,4,128,1,float16,fp8,0,0.10549867153167725
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,4,128,1,float16,float16,0,0.11466667056083679
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,8,128,1,float16,float16,0,0.11440533399581909
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,8,128,1,float16,fp8,0,0.11904000242551167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,16,128,1,float16,float16,0,0.07252266506354015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,1,128,1,float16,float16,0,0.04372799893220266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,1,128,1,float16,fp8,0,0.04171200096607208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,2,128,1,float16,fp8,0,0.04610666632652283
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,16,128,1,float16,fp8,0,0.14476799964904785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,2,128,1,float16,fp8,0,0.07481066882610321
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,4,128,1,float16,fp8,0,0.062362665931383766
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,1,128,1,float16,fp8,0,0.11499733726183574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,16,128,1,float16,float16,0,0.03613866617282232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,16,128,1,float16,fp8,0,0.07266666491826375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,1,128,1,float16,float16,0,0.027477333943049114
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,2,128,1,float16,float16,0,0.04749333361784617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,1,128,1,float16,fp8,0,0.027562665442625683
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,2,128,1,float16,fp8,0,0.02957333376010259
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,4,128,1,float16,float16,0,0.05813866853713989
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,4,128,1,float16,float16,0,0.035061334570248924
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,4,128,1,float16,fp8,0,0.03675200045108795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,8,128,1,float16,fp8,0,0.06418666740258534
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,8,128,1,float16,float16,0,0.03584533433119456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,8,128,1,float16,fp8,0,0.03788266579310099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,16,128,1,float16,float16,0,0.02958933264017105
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,16,128,1,float16,fp8,0,0.03150933235883713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,16,128,1,float16,fp8,0,0.0415786678592364
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,1,128,1,float16,float16,0,0.02533866713444392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,1,128,1,float16,fp8,0,0.023423999547958374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,2,128,1,float16,float16,0,0.027301333844661713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,1,128,1,float16,fp8,0,0.06495466828346252
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,4,128,1,float16,fp8,0,0.029509333272775013
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,8,128,1,float16,float16,0,0.029440000653266907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,8,128,1,float16,float16,0,0.06020266811052958
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,1,128,1,float16,float16,0,0.6068693399429321
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,1,128,1,float16,fp8,0,0.5782399972279867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,2,128,1,float16,float16,0,0.029317334294319153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,2,128,1,float16,float16,0,0.7422186533610026
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,16,128,1,float16,float16,0,0.13009066383043924
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,8,128,1,float16,fp8,0,0.03010133405526479
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,2,128,1,float16,fp8,0,0.7407519817352295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,4,128,1,float16,float16,0,1.438752015431722
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,16,128,1,float16,float16,0,0.8010613123575846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,4,128,1,float16,fp8,0,1.2620960076649983
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,16,128,1,float16,fp8,0,0.932090679804484
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,4,128,1,float16,float16,0,0.029578665892283123
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,1,128,1,float16,fp8,0,0.30390934149424237
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,8,128,1,float16,fp8,0,1.330458641052246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,1,128,1,float16,float16,0,0.3183679978052775
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,2,128,1,float16,float16,0,0.3794773419698079
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,4,128,1,float16,float16,0,0.7157173156738281
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,8,128,1,float16,float16,0,0.6460906664530436
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,8,128,1,float16,fp8,0,0.7673066457112631
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,2,128,1,float16,fp8,0,0.025610665480295818
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,8,128,1,float16,float16,0,1.2860639890034993
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,16,128,1,float16,fp8,0,0.4758506615956624
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,1,128,1,float16,fp8,0,0.16184000174204508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,2,128,1,float16,fp8,0,0.38656532764434814
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,4,128,1,float16,float16,0,0.34778666496276855
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,4,128,1,float16,fp8,0,0.3356959819793701
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,4,128,1,float16,fp8,0,0.7910559972127279
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,8,128,1,float16,float16,0,0.3314773241678874
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,16,128,1,float16,float16,0,0.40402666727701825
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,8,128,1,float16,fp8,0,0.3630400101343791
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,16,128,1,float16,float16,0,0.21132800976435342
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,16,128,1,float16,fp8,0,0.246778666973114
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,1,128,1,float16,fp8,0,0.08707200487454732
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,1,128,1,float16,float16,0,0.09550933043162028
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,2,128,1,float16,fp8,0,0.10896000266075134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,4,128,1,float16,float16,0,0.19266666968663534
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,4,128,1,float16,fp8,0,0.18973867098490396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,8,128,1,float16,fp8,0,0.20112532377243042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,16,128,1,float16,float16,0,0.11380267143249512
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,16,128,1,float16,fp8,0,0.13010666767756143
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,2,128,1,float16,fp8,0,0.20765332380930582
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,1,128,1,float16,fp8,0,0.049839998284975685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,4,128,1,float16,float16,0,0.09565333525339763
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,2,128,1,float16,float16,0,0.11150933305422465
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,4,128,1,float16,fp8,0,0.09273067116737366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,2,128,1,float16,float16,0,0.1997013290723165
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,1,128,1,float16,float16,0,0.1676373283068339
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,8,128,1,float16,fp8,0,0.10225066542625427
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,8,128,1,float16,float16,0,0.18810133139292398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,8,128,1,float16,float16,0,0.09782399733861287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,1,128,1,float16,float16,0,0.03345600018898646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,16,128,1,float16,fp8,0,0.06355200211207072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,2,128,1,float16,float16,0,0.037685332198937736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,4,128,1,float16,float16,0,0.047797332207361855
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,4,128,1,float16,fp8,0,0.05249600112438202
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,2,128,1,float16,float16,0,0.0580266664425532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,8,128,1,float16,float16,0,0.04879466692606608
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,8,128,1,float16,fp8,0,0.054373333851496376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,16,128,1,float16,float16,0,0.029743999242782593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,1,128,1,float16,float16,0,0.021514666577180225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,1,128,1,float16,fp8,0,0.021162666380405426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,2,128,1,float16,float16,0,0.02342933416366577
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,2,128,1,float16,fp8,0,0.023200000325838726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,4,128,1,float16,float16,0,0.02794133375088374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,4,128,1,float16,fp8,0,0.031445334355036415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,16,128,1,float16,float16,0,0.06232533355553945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,1,128,1,float16,fp8,0,0.031471999982992806
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,2,128,1,float16,fp8,0,0.03761066744724909
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,16,128,1,float16,float16,0,0.02332266668478648
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,1,128,1,float16,float16,0,0.019050666441520054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,16,128,1,float16,fp8,0,0.025605333348115284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,2,128,1,float16,float16,0,0.020053333292404812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,16,128,1,float16,fp8,0,0.035530666510264076
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,4,128,1,float16,fp8,0,0.02348266790310542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,8,128,1,float16,float16,0,0.023333333432674408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,8,128,1,float16,fp8,0,0.023578666150569916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,8,128,1,float16,float16,0,0.029504001140594482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,8,128,1,float16,fp8,0,0.031498665610949196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,2,128,1,float16,fp8,0,0.060191998879114784
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,16,128,1,float16,float16,0,0.019205333044131596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,1,128,1,float16,fp8,0,0.01932799940307935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,2,128,1,float16,fp8,0,0.02120000123977661
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,2,128,1,float16,fp8,0,0.0176959993938605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,4,128,1,float16,float16,0,0.023152001202106476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,4,128,1,float16,float16,0,0.019199999670187633
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,8,128,1,float16,float16,0,0.019237333287795384
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,8,128,1,float16,fp8,0,0.021269333859284718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,1,128,1,float16,float16,0,0.05202133456865946
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,1,128,1,float16,float16,0,0.01915733392039935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,1,128,1,float16,fp8,0,0.01740266631046931
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,2,128,1,float16,float16,0,0.01721599946419398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,1,128,1,float16,float16,0,0.2565760016441345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,4,128,1,float16,fp8,0,0.019285333653291065
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,1,128,1,float16,fp8,0,0.253440002600352
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,2,128,1,float16,float16,0,0.32476266225179035
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,4,128,1,float16,fp8,0,0.6016906499862671
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,2,128,1,float16,fp8,0,0.3345760107040405
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,16,128,1,float16,fp8,0,0.02145066608985265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,16,128,1,float16,float16,0,0.3757973512013753
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,4,128,1,float16,float16,0,0.6524693171183268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,16,128,1,float16,fp8,0,0.4477173487345378
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,8,128,1,float16,float16,0,0.5864266554514567
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,1,128,1,float16,fp8,0,0.13489066561063132
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,2,128,1,float16,float16,0,0.17081600427627563
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,8,128,1,float16,fp8,0,0.6330399910608927
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,2,128,1,float16,fp8,0,0.17454399665196738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,4,128,1,float16,float16,0,0.3147626717885335
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,4,128,1,float16,fp8,0,0.33725865681966144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,8,128,1,float16,fp8,0,0.33260265986124676
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,1,128,1,float16,float16,0,0.07649600009123485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,8,128,1,float16,float16,0,0.328277329603831
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,1,128,1,float16,fp8,0,0.0727040022611618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,2,128,1,float16,float16,0,0.09316266576449077
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,16,128,1,float16,fp8,0,0.2302239934603373
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,4,128,1,float16,float16,0,0.17653866608937582
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,1,128,1,float16,float16,0,0.13777066270510355
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,8,128,1,float16,float16,0,0.16725333531697592
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,16,128,1,float16,fp8,0,0.12358400225639343
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,16,128,1,float16,float16,0,0.10623466968536377
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,2,128,1,float16,float16,0,0.04996799925963084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,2,128,1,float16,fp8,0,0.05235733091831207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,16,128,1,float16,float16,0,0.19477333625157675
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,4,128,1,float16,float16,0,0.08230933547019958
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,4,128,1,float16,fp8,0,0.08370133241017659
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,8,128,1,float16,float16,0,0.09590400258700053
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,16,128,1,float16,float16,0,0.054005334774653115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,2,128,1,float16,fp8,0,0.09358933568000793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,16,128,1,float16,fp8,0,0.0561706672112147
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,4,128,1,float16,fp8,0,0.19125332434972128
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,1,128,1,float16,fp8,0,0.02603733291228612
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,8,128,1,float16,fp8,0,0.181658665339152
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,4,128,1,float16,float16,0,0.04177066683769226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,4,128,1,float16,fp8,0,0.04791999856630961
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,8,128,1,float16,float16,0,0.04171733558177948
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,8,128,1,float16,fp8,0,0.04851733148097992
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,16,128,1,float16,float16,0,0.02672533442576726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,8,128,1,float16,fp8,0,0.09425066908200581
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,1,128,1,float16,float16,0,0.017317333569129307
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,2,128,1,float16,float16,0,0.01916266605257988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,2,128,1,float16,fp8,0,0.01966399947802226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,2,128,1,float16,fp8,0,0.03163733333349228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,4,128,1,float16,fp8,0,0.027514666318893433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,8,128,1,float16,float16,0,0.02548266698916753
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,8,128,1,float16,fp8,0,0.028832000990708668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,16,128,1,float16,fp8,0,0.03357866654793421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,16,128,1,float16,float16,0,0.019253333409627277
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,1,128,1,float16,float16,0,0.026672000686327618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,16,128,1,float16,fp8,0,0.02330133318901062
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,1,128,1,float16,float16,0,0.01515199989080429
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,2,128,1,float16,float16,0,0.0295413335164388
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,1,128,1,float16,fp8,0,0.04377600053946177
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,4,128,1,float16,float16,0,0.01918399954835574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,4,128,1,float16,fp8,0,0.021253332495689392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,8,128,1,float16,fp8,0,0.02124800036350886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,16,128,1,float16,float16,0,0.01709866647919019
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,1,128,1,float16,fp8,0,0.01757866640885671
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,16,128,1,float16,fp8,0,0.019146667172511418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,1,128,1,float16,float16,0,0.015173333386580149
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,1,128,1,float16,fp8,0,0.015109332899252573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,1,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,2,128,1,float16,float16,0,0.01509333277742068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,1,128,1,float16,float16,0,0.043791999419530235
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,4,128,1,float16,float16,0,0.017162666966517765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,2,128,1,float16,fp8,0,0.014954666296641031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,4,128,1,float16,float16,0,0.02533866713444392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,8,128,1,float16,float16,0,0.01700266698996226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,8,128,1,float16,float16,0,0.019381333142518997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,1,128,1,float16,float16,0,0.015029333531856537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,16,128,1,float16,fp8,0,0.017194667210181553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,1,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,2,128,1,float16,float16,0,0.015125333021084467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,2,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,4,128,1,float16,fp8,0,0.01498666654030482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,8,128,1,float16,fp8,0,0.015216000378131866
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,2,128,1,float16,float16,0,0.01725333308180173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,4,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,8,128,1,float16,fp8,0,0.017360000560681026
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,1,128,1,float16,float16,0,0.15989866852760315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,16,128,1,float16,float16,0,0.014954666296641031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,1,128,1,float16,fp8,0,0.15870933731396994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,4,128,1,float16,float16,0,0.015413332730531693
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,2,128,1,float16,float16,0,0.19190933307011923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,8,128,1,float16,float16,0,0.015141333142916361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,2,128,1,float16,fp8,0,0.20237332582473755
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,4,128,1,float16,fp8,0,0.32946133613586426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,2,128,1,float16,fp8,0,0.01718933383623759
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,16,128,1,float16,float16,0,0.204202671845754
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,8,128,1,float16,float16,0,0.36826666196187335
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,16,128,1,float16,fp8,0,0.24147733052571616
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,1,128,1,float16,fp8,0,0.08483200271924336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,2,128,1,float16,float16,0,0.10409067074457805
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,2,128,1,float16,fp8,0,0.10629866520563762
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,4,128,1,float16,float16,0,0.1873706579208374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,4,128,1,float16,float16,0,0.33560534318288165
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,8,128,1,float16,float16,0,0.17146666844685873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,8,128,1,float16,fp8,0,0.19274665911992392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,16,128,1,float16,float16,0,0.10820266604423523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,1,128,1,float16,float16,0,0.08844799796740214
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,1,128,1,float16,float16,0,0.045754666129748024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,2,128,1,float16,float16,0,0.05349866549173991
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,2,128,1,float16,fp8,0,0.056261335810025535
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,4,128,1,float16,float16,0,0.08915199836095174
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,8,128,1,float16,float16,0,0.09839466214179993
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,4,128,1,float16,fp8,0,0.19908799727757773
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,8,128,1,float16,fp8,0,0.09771733482678731
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,16,128,1,float16,float16,0,0.0583840012550354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,1,128,1,float16,float16,0,0.02898666759332021
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,2,128,1,float16,float16,0,0.03242133309443792
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,2,128,1,float16,fp8,0,0.03349866718053818
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,4,128,1,float16,float16,0,0.04379733403523763
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,16,128,1,float16,fp8,0,0.12571199735005698
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,8,128,1,float16,float16,0,0.04414399961630503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,8,128,1,float16,fp8,0,0.05183466772238413
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,16,128,1,float16,float16,0,0.02752533306678136
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,1,128,1,float16,float16,0,0.01916266605257988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,1,128,1,float16,fp8,0,0.02956799914439519
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,1,128,1,float16,fp8,0,0.019050666441520054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,2,128,1,float16,float16,0,0.020687999824682873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,1,128,1,float16,fp8,0,0.04580800235271454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,4,128,1,float16,fp8,0,0.029311999678611755
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,4,128,1,float16,fp8,0,0.08852799733479817
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,8,128,1,float16,float16,0,0.025626666843891144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,8,128,1,float16,fp8,0,0.02937600016593933
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,16,128,1,float16,float16,0,0.017445333302021027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,16,128,1,float16,fp8,0,0.06025066475073496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,1,128,1,float16,float16,0,0.013408000270525614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,2,128,1,float16,fp8,0,0.021183999876181286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,2,128,1,float16,float16,0,0.015119999647140503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,4,128,1,float16,float16,0,0.01706133286158244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,4,128,1,float16,fp8,0,0.01913600042462349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,16,128,1,float16,fp8,0,0.03345066557327906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,8,128,1,float16,float16,0,0.017338667064905167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,8,128,1,float16,fp8,0,0.01913600042462349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,16,128,1,float16,float16,0,0.015050667027632395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,16,128,1,float16,fp8,0,0.017125333348910015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,1,128,1,float16,float16,0,0.012986666212479273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,1,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,2,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,4,128,1,float16,float16,0,0.015050667027632395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,4,128,1,float16,fp8,0,0.015087999403476715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,8,128,1,float16,float16,0,0.015077333897352219
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,16,128,1,float16,fp8,0,0.021205333371957142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,8,128,1,float16,fp8,0,0.015103999525308609
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,1,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,16,128,1,float16,float16,0,0.012986666212479273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,1,128,1,float16,float16,0,0.01184533288081487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,2,128,1,float16,float16,0,0.012970666090647379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,1,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,2,128,1,float16,float16,0,0.012560000022252401
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,4,128,1,float16,float16,0,0.0116799995303154
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,4,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,8,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,16,128,1,float16,float16,0,0.012842666357755661
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,16,128,1,float16,fp8,0,0.01321600005030632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,1,128,1,float16,float16,0,0.011205332974592844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,16,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,4,128,1,float16,fp8,0,0.04976533353328705
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,2,128,1,float16,float16,0,0.012719999998807907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,4,128,1,float16,float16,0,0.012991999586423239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,4,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,8,128,1,float16,float16,0,0.012863999853531519
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,8,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,8,128,1,float16,fp8,0,0.36262933413187665
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,2,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,1,128,1,float16,float16,0,0.13013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,8,128,1,float16,float16,0,0.011674666156371435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,2,128,1,float16,float16,0,0.14503467082977295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,2,128,1,float16,fp8,0,0.14472533265749613
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,2,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,4,128,1,float16,float16,0,0.22904000679651895
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,4,128,1,float16,fp8,0,0.2167146603266398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,8,128,1,float16,float16,0,0.2151093284289042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,16,128,1,float16,float16,0,0.12828800082206726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,1,128,1,float16,float16,0,0.06896000107129414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,8,128,1,float16,fp8,0,0.23676800727844238
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,1,128,1,float16,fp8,0,0.06621866424878438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,2,128,1,float16,float16,0,0.07452799876530965
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,2,128,1,float16,fp8,0,0.0762613316377004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,4,128,1,float16,float16,0,0.025637333591779072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,2,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,4,128,1,float16,fp8,0,0.10724799831708272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,4,128,1,float16,float16,0,0.11717333396275838
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,8,128,1,float16,fp8,0,0.11916800340016682
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,16,128,1,float16,fp8,0,0.06825066606203715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,1,128,1,float16,fp8,0,0.12562666336695352
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,2,128,1,float16,float16,0,0.042352000872294106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,2,128,1,float16,fp8,0,0.043194666504859924
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,1,128,1,float16,fp8,0,0.01257066677014033
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,4,128,1,float16,float16,0,0.05348266661167145
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,8,128,1,float16,float16,0,0.11950400471687317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,8,128,1,float16,float16,0,0.05474133292833964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,16,128,1,float16,fp8,0,0.14477333426475525
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,1,128,1,float16,fp8,0,0.03754133234421412
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,16,128,1,float16,float16,0,0.03355200091997782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,1,128,1,float16,float16,0,0.02532800038655599
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,1,128,1,float16,fp8,0,0.025226667523384094
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,2,128,1,float16,float16,0,0.02550400048494339
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,4,128,1,float16,float16,0,0.03146666785081228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,4,128,1,float16,fp8,0,0.03347733368476232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,4,128,1,float16,fp8,0,0.058277333776156105
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,8,128,1,float16,float16,0,0.03194133440653483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,8,128,1,float16,fp8,0,0.06007466713587443
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,1,128,1,float16,float16,0,0.017018667111794155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,16,128,1,float16,fp8,0,0.039664000272750854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,1,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,2,128,1,float16,float16,0,0.017237332959969837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,2,128,1,float16,fp8,0,0.01809599995613098
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,2,128,1,float16,fp8,0,0.02739199995994568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,4,128,1,float16,float16,0,0.019797333826621372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,4,128,1,float16,fp8,0,0.021269333859284718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,8,128,1,float16,fp8,0,0.022543999056021374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,16,128,1,float16,float16,0,0.015072000523408255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,1,128,1,float16,float16,0,0.03966933240493139
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,16,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,2,128,1,float16,float16,0,0.012906666845083237
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,2,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,4,128,1,float16,float16,0,0.01441066712141037
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,4,128,1,float16,fp8,0,0.015002666662136713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,8,128,1,float16,fp8,0,0.03565333286921183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,8,128,1,float16,float16,0,0.014463999619086584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,8,128,1,float16,fp8,0,0.015135999768972397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,16,128,1,float16,float16,0,0.06834133466084798
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,1,128,1,float16,float16,0,0.013023999830087027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,16,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,1,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,2,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,4,128,1,float16,float16,0,0.01301866645614306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,8,128,1,float16,float16,0,0.021749332547187805
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,4,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,8,128,1,float16,float16,0,0.011994666109482447
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,16,128,1,float16,fp8,0,0.023711999257405598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,1,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,16,128,1,float16,float16,0,0.013130666067202887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,2,128,1,float16,float16,0,0.011765333513418833
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,2,128,1,float16,float16,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,2,128,1,float16,fp8,0,0.012309333930412928
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,4,128,1,float16,float16,0,0.011749333391586939
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,4,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,8,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,8,128,1,float16,float16,0,0.012026666353146235
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,8,128,1,float16,fp8,0,0.012346666306257248
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,16,128,1,float16,float16,0,0.01156266654531161
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,16,128,1,float16,float16,0,0.011407999942700068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,1,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,1,128,1,float16,float16,0,0.011055999745925268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,2,128,1,float16,fp8,0,0.012122667084137598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,4,128,1,float16,float16,0,0.011002667248249054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,4,128,1,float16,fp8,0,0.01137599969903628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,8,128,1,float16,float16,0,0.011482667177915573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,8,128,1,float16,fp8,0,0.01173866664369901
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,16,128,1,float16,float16,0,0.021381333470344543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,16,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,1,128,1,float16,float16,0,0.10854933659235637
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,1,128,1,float16,float16,0,0.011039999624093374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,1,128,1,float16,fp8,0,0.10412266850471497
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,2,128,1,float16,float16,0,0.11826666196187337
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,2,128,1,float16,fp8,0,0.11141866445541382
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,4,128,1,float16,fp8,0,0.1437386671702067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,8,128,1,float16,fp8,0,0.1643946667512258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,8,128,1,float16,float16,0,0.16005333264668783
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,16,128,1,float16,float16,0,0.09074667096138
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,16,128,1,float16,fp8,0,0.09092799822489421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,1,128,1,float16,fp8,0,0.056186666091283165
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,16,128,1,float16,fp8,0,0.011962667107582092
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,2,128,1,float16,float16,0,0.06432533264160156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,2,128,1,float16,fp8,0,0.06232533355553945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,4,128,1,float16,float16,0,0.07459199925263722
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,4,128,1,float16,fp8,0,0.07634666562080383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,4,128,1,float16,float16,0,0.15455466508865356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,8,128,1,float16,float16,0,0.07890666524569194
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,8,128,1,float16,fp8,0,0.07851733267307281
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,16,128,1,float16,fp8,0,0.046207999189694725
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,1,128,1,float16,float16,0,0.0354666660229365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,1,128,1,float16,fp8,0,0.03350933392842611
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,2,128,1,float16,float16,0,0.03753600021203359
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,4,128,1,float16,float16,0,0.041749333341916404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,4,128,1,float16,fp8,0,0.041749333341916404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,8,128,1,float16,float16,0,0.04186666508515676
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,1,128,1,float16,fp8,0,0.01098666712641716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,8,128,1,float16,fp8,0,0.04394666850566864
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,16,128,1,float16,fp8,0,0.029301332930723827
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,1,128,1,float16,float16,0,0.023168000082174938
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,1,128,1,float16,fp8,0,0.02146133283774058
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,2,128,1,float16,fp8,0,0.023402666052182514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,4,128,1,float16,float16,0,0.0252960001428922
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,4,128,1,float16,fp8,0,0.027386667827765148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,2,128,1,float16,fp8,0,0.0355679988861084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,8,128,1,float16,float16,0,0.025418666501839954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,8,128,1,float16,fp8,0,0.027376001079877216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,16,128,1,float16,float16,0,0.027477333943049114
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,16,128,1,float16,fp8,0,0.01926933353145917
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,1,128,1,float16,float16,0,0.01533866673707962
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,1,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,2,128,1,float16,float16,0,0.023546665906906128
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,2,128,1,float16,float16,0,0.015392000476519266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,4,128,1,float16,float16,0,0.017338667064905167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,4,128,1,float16,fp8,0,0.017450666675964992
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,8,128,1,float16,fp8,0,0.01754133279124896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,8,128,1,float16,float16,0,0.01714666684468587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,16,128,1,float16,float16,0,0.01314666618903478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,16,128,1,float16,float16,0,0.017130666722853977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,1,128,1,float16,float16,0,0.013162666310866674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,1,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,2,128,1,float16,float16,0,0.012853333105643591
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,2,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,2,128,1,float16,fp8,0,0.015146666516860327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,4,128,1,float16,float16,0,0.013077333569526672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,8,128,1,float16,float16,0,0.01314666618903478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,1,128,1,float16,float16,0,0.01190399999419848
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,16,128,1,float16,float16,0,0.012319999436537424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,1,128,1,float16,float16,0,0.011354666203260422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,1,128,1,float16,fp8,0,0.01231466606259346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,16,128,1,float16,fp8,0,0.01333333303531011
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,2,128,1,float16,float16,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,4,128,1,float16,float16,0,0.011114666859308878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,4,128,1,float16,fp8,0,0.01163200040658315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,8,128,1,float16,float16,0,0.011173332730929056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,8,128,1,float16,fp8,0,0.011781333635250727
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,4,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,16,128,1,float16,float16,0,0.011215999722480774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,8,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,1,128,1,float16,fp8,0,0.011098666737476984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,2,128,1,float16,fp8,0,0.011098666737476984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,16,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,2,128,1,float16,fp8,0,0.011349332829316458
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,8,128,1,float16,float16,0,0.011253333340088526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,8,128,1,float16,fp8,0,0.011205332974592844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,16,128,1,float16,float16,0,0.01126933346192042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,16,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,1,128,1,float16,float16,0,0.010965333630641302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,1,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,1,128,1,float16,float16,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,2,128,1,float16,fp8,0,0.010954666882753372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,1,128,1,float16,float16,0,0.06026133398214976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,4,128,1,float16,float16,0,0.011109333485364914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,4,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,8,128,1,float16,float16,0,0.011157333850860596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,8,128,1,float16,fp8,0,0.012042666474978128
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,4,128,1,float16,fp8,0,0.011343999455372492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,1,128,1,float16,float16,0,0.10447999835014343
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,2,128,1,float16,float16,0,0.10519466797510783
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,1,128,1,float16,fp8,0,0.09502399961153667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,2,128,1,float16,float16,0,0.011264000087976456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,4,128,1,float16,float16,0,0.12155200044314067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,4,128,1,float16,fp8,0,0.11496532956759135
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,8,128,1,float16,float16,0,0.1267306705315908
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,8,128,1,float16,fp8,0,0.11689066886901855
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,16,128,1,float16,float16,0,0.07343466579914093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,16,128,1,float16,fp8,0,0.06729599833488464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,1,128,1,float16,float16,0,0.05811200042565664
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,1,128,1,float16,fp8,0,0.05259199937184652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,2,128,1,float16,fp8,0,0.09922666351000468
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,2,128,1,float16,fp8,0,0.05607999861240387
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,16,128,1,float16,fp8,0,0.012378666549921036
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,4,128,1,float16,fp8,0,0.062405332922935486
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,8,128,1,float16,float16,0,0.06479466458161671
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,4,128,1,float16,float16,0,0.01091733326514562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,8,128,1,float16,fp8,0,0.06249066690603892
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,16,128,1,float16,float16,0,0.03765333443880081
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,1,128,1,float16,fp8,0,0.03145600110292435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,2,128,1,float16,float16,0,0.034832000732421875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,2,128,1,float16,fp8,0,0.03217600037654241
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,4,128,1,float16,float16,0,0.037690666814645134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,4,128,1,float16,fp8,0,0.03584533433119456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,16,128,1,float16,float16,0,0.04385066529115041
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,8,128,1,float16,fp8,0,0.036618667344252266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,4,128,1,float16,float16,0,0.06447466711203258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,16,128,1,float16,float16,0,0.023562667270501454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,1,128,1,float16,fp8,0,0.021317332983016968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,2,128,1,float16,float16,0,0.021903999149799347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,2,128,1,float16,fp8,0,0.021359999974568684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,1,128,1,float16,float16,0,0.033514666060606636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,4,128,1,float16,fp8,0,0.023381332556406658
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,8,128,1,float16,float16,0,0.02319466571013133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,8,128,1,float16,fp8,0,0.023445333043734234
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,2,128,1,float16,float16,0,0.05978666742642721
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,16,128,1,float16,float16,0,0.015135999768972397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,16,128,1,float16,fp8,0,0.016143999993801117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,16,128,1,float16,fp8,0,0.02492800106604894
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,2,128,1,float16,float16,0,0.015333333363135656
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,2,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,4,128,1,float16,float16,0,0.015135999768972397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,16,128,1,float16,fp8,0,0.03799466788768768
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,4,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,8,128,1,float16,float16,0,0.01522133375207583
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,8,128,1,float16,fp8,0,0.01526933287580808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,16,128,1,float16,float16,0,0.013104000439246496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,8,128,1,float16,float16,0,0.03755733370780945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,16,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,1,128,1,float16,float16,0,0.011765333513418833
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,2,128,1,float16,float16,0,0.012901333471139273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,2,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,1,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,4,128,1,float16,float16,0,0.013125333935022354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,8,128,1,float16,float16,0,0.0129120002190272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,16,128,1,float16,float16,0,0.010954666882753372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,16,128,1,float16,fp8,0,0.011584000041087469
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,1,128,1,float16,float16,0,0.01118933285276095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,1,128,1,float16,fp8,0,0.012346666306257248
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,1,128,1,float16,fp8,0,0.011050666371981302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,1,128,1,float16,float16,0,0.01505600040157636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,2,128,1,float16,fp8,0,0.011274666835864386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,4,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,8,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,8,128,1,float16,float16,0,0.010928000013033548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,8,128,1,float16,fp8,0,0.011658667276302973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,16,128,1,float16,float16,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,1,128,1,float16,float16,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,2,128,1,float16,float16,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,2,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,4,128,1,float16,float16,0,0.01089599976936976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,4,128,1,float16,fp8,0,0.012047999848922094
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,4,128,1,float16,fp8,0,0.013183999806642532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,8,128,1,float16,fp8,0,0.011952000359694162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,4,128,1,float16,float16,0,0.02317333221435547
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,16,128,1,float16,float16,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,1,128,1,float16,float16,0,0.011328000575304031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,1,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,2,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,4,128,1,float16,float16,0,0.011178666104873022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,4,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,1,128,1,float16,float16,0,0.021162666380405426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,8,128,1,float16,float16,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,8,128,1,float16,float16,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,8,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,4,128,1,float16,float16,0,0.011343999455372492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,16,128,1,float16,fp8,0,0.01179733375708262
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,0,0.013066666821638743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,0,0.014736000448465347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,0,0.013264000415802002
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,0,0.019386666516462963
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,0,0.017317333569129307
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,0,0.02941333254178365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,0,0.02552533398071925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,16,128,1,float16,float16,0,0.02938666691382726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,16,128,1,float16,fp8,0,0.025498665869235992
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,0,0.011077333241701126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,0,0.0129120002190272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,1,128,1,float16,fp8,0,0.011247999966144562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,0,0.01309866706530253
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,0,0.019343999524911244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,16,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,0,0.00966933307548364
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,0,0.009637333452701569
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,0,0.00997866690158844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,0,0.01089599976936976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,0,0.015034666905800501
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,16,128,1,float16,float16,0,0.015066667149464289
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,0,0.009829333052039146
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,16,128,1,float16,fp8,0,0.01333333303531011
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,16,128,1,float16,float16,0,0.019386666516462963
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,0,0.010645333677530289
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,0,0.009093333035707474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,0,0.010010666524370512
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,0,0.011098666737476984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,16,128,1,float16,float16,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,16,128,1,float16,fp8,0,0.011695999652147293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,0,0.01488000030318896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,0,0.00926399976015091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,0,0.009898666913310686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,0,0.009029333169261614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,0,0.009642666826645533
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,16,128,1,float16,float16,0,0.009685333197315535
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,2,128,1,float16,float16,0,0.011359999577204386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,0,0.009114666531483332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,0,0.00921066664159298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,0,0.009093333035707474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,0,0.009088000282645226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,0,0.009189333145817121
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,0,0.009103999783595404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,16,128,1,float16,float16,0,0.009002666920423508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,0,0.009018666421373686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,16,128,1,float16,fp8,0,0.009418666362762451
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,0,0.009258666386206945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,0,0.00892800030608972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,0,0.00916800027092298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,0,0.009162666896979014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,0,0.009082666908701261
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,16,128,1,float16,fp8,0,0.009301333377758661
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,16,128,1,float16,fp8,0,0.011114666859308878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,16,128,1,float16,float16,0,0.008954666554927826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,16,128,1,float16,fp8,0,0.009242666885256767
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,0,0.009205333267649015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,0,0.009301333377758661
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,0,0.00922133338948091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,0,0.009162666896979014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,0,0.009173333023985228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,16,128,1,float16,float16,0,0.009130666653315226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,0,0.009002666920423508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,0,0.008826666822036108
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,0,0.009002666920423508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,0,0.009322666873534521
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,0,0.009232000137368837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,0,0.009039999917149544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,16,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,0,0.00955200009047985
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,0,0.00892800030608972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,1,128,1,float16,float16,0,5.75161616007487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,0,0.009077333534757296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,2,128,1,float16,float16,0,5.600874582926433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,0,0.009114666531483332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,2,128,1,float16,fp8,0,4.4653120040893555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,12,128,1,float16,float16,0,3.2235838572184243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,4,128,1,float16,fp8,0,5.233237266540527
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,4,128,1,float16,float16,0,6.362277348836263
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,2,128,1,float16,float16,0,2.7611465454101562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,1,128,1,float16,fp8,0,4.29307746887207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,2,128,1,float16,fp8,0,2.4466452598571777
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,4,128,1,float16,float16,0,3.1213547388712564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,4,128,1,float16,fp8,0,2.698063850402832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,12,128,1,float16,float16,0,1.6726187070210774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,1,128,1,float16,float16,0,1.4120640754699707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,1,128,1,float16,fp8,0,1.2746346791585286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,2,128,1,float16,float16,0,1.4862027168273926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,2,128,1,float16,fp8,0,1.2420213222503662
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,4,128,1,float16,float16,0,1.5827040672302246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,12,128,1,float16,fp8,0,2.8190294901529946
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,1,128,1,float16,float16,0,2.772298812866211
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,4,128,1,float16,fp8,0,1.4229226112365723
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,1,128,1,float16,fp8,0,0.6481333176294962
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,2,128,1,float16,float16,0,0.7875839869181315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,2,128,1,float16,fp8,0,0.6738773187001547
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,4,128,1,float16,fp8,0,0.7714986801147461
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,12,128,1,float16,fp8,0,1.4733707110087078
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,1,128,1,float16,float16,0,3.156410535176595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,1,128,1,float16,fp8,0,2.5390613873799643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,1,128,1,float16,fp8,0,2.22107203801473
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,2,128,1,float16,float16,0,3.15995184580485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,12,128,1,float16,float16,0,0.8896160125732422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,12,128,1,float16,fp8,0,0.8062986532847086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,2,128,1,float16,fp8,0,2.8550294240315757
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,4,128,1,float16,float16,0,0.8467946847279867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,4,128,1,float16,fp8,0,3.1229171752929688
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,12,128,1,float16,float16,0,1.9311946233113606
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,1,128,1,float16,fp8,0,1.3350186347961426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,2,128,1,float16,fp8,0,1.3880747159322102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,4,128,1,float16,fp8,0,1.6105225880940754
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,4,128,1,float16,float16,0,1.869717280069987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,1,128,1,float16,float16,0,0.7607359886169434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,1,128,1,float16,float16,0,0.8334026336669922
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,1,128,1,float16,fp8,0,0.7151839733123779
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,2,128,1,float16,float16,0,0.861951986948649
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,12,128,1,float16,fp8,0,1.7711466153462727
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,1,128,1,float16,float16,0,1.5821013450622559
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,4,128,1,float16,float16,0,0.9731679757436117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,4,128,1,float16,fp8,0,0.8916213512420654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,12,128,1,float16,float16,0,0.547050674756368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,12,128,1,float16,fp8,0,0.5169066588083903
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,1,128,1,float16,float16,0,0.4639893372853597
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,2,128,1,float16,float16,0,0.4740693171819051
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,12,128,1,float16,float16,0,1.002234697341919
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,2,128,1,float16,fp8,0,0.4166133403778076
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,12,128,1,float16,fp8,0,0.937882661819458
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,4,128,1,float16,float16,0,0.5299733479817709
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,4,128,1,float16,fp8,0,0.46306665738423664
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,4,128,1,float16,float16,0,3.706986745198568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,2,128,1,float16,fp8,0,0.7466186682383219
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,1,128,1,float16,float16,0,2.269183953603109
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,2,128,1,float16,fp8,0,1.9360480308532715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,2,128,1,float16,float16,0,1.631264050801595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,4,128,1,float16,fp8,0,2.270469347635905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,12,128,1,float16,fp8,0,1.333077351252238
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,1,128,1,float16,float16,0,1.131989320119222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,1,128,1,float16,fp8,0,0.9581333001454672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,2,128,1,float16,float16,0,1.1737866401672363
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,2,128,1,float16,fp8,0,1.011413335800171
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,4,128,1,float16,float16,0,1.3609387079874675
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,2,128,1,float16,float16,0,2.2675573031107583
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,4,128,1,float16,fp8,0,1.1881013711293538
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,12,128,1,float16,fp8,0,0.7091146310170492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,1,128,1,float16,fp8,0,0.4077920118967692
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,1,128,1,float16,float16,0,0.6061386664708456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,1,128,1,float16,fp8,0,0.5242133140563965
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,2,128,1,float16,float16,0,0.625210682551066
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,2,128,1,float16,fp8,0,0.5540266831715902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,4,128,1,float16,float16,0,0.7178826332092285
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,12,128,1,float16,float16,0,0.4146346648534139
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,1,128,1,float16,fp8,0,1.8247146606445312
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,1,128,1,float16,float16,0,0.34272531668345135
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,2,128,1,float16,float16,0,0.3474080165227254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,1,128,1,float16,fp8,0,0.3070346713066101
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,12,128,1,float16,float16,0,0.7445600032806396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,4,128,1,float16,float16,0,0.40062399705251056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,4,128,1,float16,float16,0,2.7084531784057617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,1,128,1,float16,fp8,0,2.3809706370035806
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,4,128,1,float16,fp8,0,0.6663680076599121
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,12,128,1,float16,fp8,0,0.3989280064900716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,2,128,1,float16,float16,0,3.0447092056274414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,2,128,1,float16,fp8,0,0.32097599903742474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,2,128,1,float16,fp8,0,2.5570507049560547
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,12,128,1,float16,float16,0,1.4906293551127117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,12,128,1,float16,float16,0,1.9483146667480469
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,4,128,1,float16,fp8,0,3.3217334747314453
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,12,128,1,float16,fp8,0,1.8265066146850586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,1,128,1,float16,float16,0,1.49398406346639
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,1,128,1,float16,fp8,0,1.2321120103200276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,2,128,1,float16,float16,0,1.5551786422729492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,1,128,1,float16,float16,0,2.858703931172689
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,4,128,1,float16,float16,0,1.8151893615722656
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,4,128,1,float16,fp8,0,1.5888320604960124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,1,128,1,float16,float16,0,0.7561279932657877
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,12,128,1,float16,fp8,0,0.9950239658355713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,12,128,1,float16,float16,0,1.0066133340199788
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,1,128,1,float16,fp8,0,0.6598399877548218
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,4,128,1,float16,fp8,0,0.35660799344380695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,2,128,1,float16,fp8,0,0.6999039649963379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,12,128,1,float16,float16,0,0.5252053340276083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,1,128,1,float16,float16,0,0.4111573298772176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,1,128,1,float16,fp8,0,0.36511464913686115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,2,128,1,float16,float16,0,0.42692800362904865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,2,128,1,float16,fp8,0,1.3208693663279216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,4,128,1,float16,float16,0,0.5005760192871094
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,4,128,1,float16,fp8,0,0.4582506815592448
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,12,128,1,float16,float16,0,0.29702399174372357
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,1,128,1,float16,float16,0,0.23956799507141113
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,4,128,1,float16,float16,0,3.676464080810547
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,12,128,1,float16,fp8,0,0.5120053291320801
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,2,128,1,float16,float16,0,0.24447999397913614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,2,128,1,float16,fp8,0,0.22763733069101968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,2,128,1,float16,fp8,0,0.3806186517079671
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,4,128,1,float16,float16,0,0.26994667450586957
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,4,128,1,float16,fp8,0,0.25810666879018146
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,1,128,1,float16,float16,0,1.6872960726420085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,12,128,1,float16,fp8,0,0.29015467564264935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,2,128,1,float16,float16,0,1.7871360778808594
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,4,128,1,float16,float16,0,0.9493227005004883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,1,128,1,float16,fp8,0,0.21677333116531372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,2,128,1,float16,fp8,0,1.6845919291178386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,4,128,1,float16,float16,0,2.2939626375834146
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,4,128,1,float16,fp8,0,2.1258293787638345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,1,128,1,float16,float16,0,0.8678507010142008
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,12,128,1,float16,fp8,0,1.1953919728597004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,1,128,1,float16,fp8,0,0.7550293604532877
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,2,128,1,float16,fp8,0,0.8206133047739664
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,1,128,1,float16,fp8,0,1.4449706077575684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,4,128,1,float16,fp8,0,1.087450663248698
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,12,128,1,float16,float16,0,0.628058671951294
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,4,128,1,float16,fp8,0,0.856661319732666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,12,128,1,float16,fp8,0,0.6278453270594279
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,1,128,1,float16,float16,0,0.46069331963857013
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,1,128,1,float16,fp8,0,0.4152746597925822
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,2,128,1,float16,fp8,0,0.4407466650009155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,12,128,1,float16,float16,0,1.2229333718617756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,4,128,1,float16,float16,0,0.6068426767985026
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,12,128,1,float16,float16,0,0.3436479965845744
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,2,128,1,float16,float16,0,0.9178880055745443
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,2,128,1,float16,float16,0,0.2696959972381592
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,2,128,1,float16,float16,0,0.8125973542531332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,2,128,1,float16,fp8,0,0.24275734027226767
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,4,128,1,float16,float16,0,0.3239946762720744
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,4,128,1,float16,fp8,0,0.2898079951604207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,2,128,1,float16,float16,0,0.4923786719640096
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,12,128,1,float16,fp8,0,0.18955200910568237
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,1,128,1,float16,float16,0,0.1602079967657725
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,4,128,1,float16,fp8,0,0.5521813233693441
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,2,128,1,float16,float16,0,0.16660799582799277
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,1,128,1,float16,float16,0,0.25499733289082843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,4,128,1,float16,float16,0,0.18438400824864706
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,1,128,1,float16,fp8,0,0.23185600837071738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,4,128,1,float16,float16,0,1.1542399724324544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,1,128,1,float16,fp8,0,1.4072052637736003
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,12,128,1,float16,float16,0,0.20147732893625894
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,1,128,1,float16,fp8,0,0.1488640010356903
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,2,128,1,float16,fp8,0,0.15470932920773825
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,12,128,1,float16,fp8,0,0.34202667077382404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,4,128,1,float16,fp8,0,0.1792746583620707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,12,128,1,float16,float16,0,1.293877363204956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,4,128,1,float16,fp8,0,2.155306657155355
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,12,128,1,float16,fp8,0,1.3188587029774983
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,1,128,1,float16,float16,0,1.6677865982055664
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,2,128,1,float16,float16,0,1.7546399434407551
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,1,128,1,float16,float16,0,0.8394186496734619
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,2,128,1,float16,float16,0,0.9009119669596354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,2,128,1,float16,fp8,0,1.5750239690144856
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,4,128,1,float16,float16,0,1.1928319931030273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,4,128,1,float16,fp8,0,1.2222346464792888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,12,128,1,float16,float16,0,0.6603840192159017
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,1,128,1,float16,float16,0,0.42927467823028564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,1,128,1,float16,fp8,0,0.3866399923960368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,2,128,1,float16,float16,0,0.47222399711608887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,2,128,1,float16,fp8,0,0.43376000722249347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,4,128,1,float16,float16,0,0.6132106781005859
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,12,128,1,float16,float16,0,0.35091201464335126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,1,128,1,float16,fp8,0,0.7294346491495768
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,2,128,1,float16,fp8,0,0.8124639987945557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,2,128,1,float16,float16,0,0.25940799713134766
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,2,128,1,float16,fp8,0,0.2358293334643046
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,4,128,1,float16,float16,0,0.3402880032857259
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,4,128,1,float16,float16,0,2.4705492655436196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,4,128,1,float16,fp8,0,0.3088373343149821
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,12,128,1,float16,fp8,0,0.1991306742032369
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,12,128,1,float16,float16,0,0.19930134216944376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,2,128,1,float16,float16,0,0.1486026644706726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,12,128,1,float16,fp8,0,0.36207465330759686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,2,128,1,float16,fp8,0,0.13923199971516928
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,1,128,1,float16,float16,0,0.2373653252919515
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,4,128,1,float16,fp8,0,0.17033600807189941
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,12,128,1,float16,float16,0,0.10842133561770122
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,12,128,1,float16,fp8,0,0.10915199915568034
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,1,128,1,float16,float16,0,0.09199999769528706
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,1,128,1,float16,fp8,0,0.08458667000134786
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,12,128,1,float16,fp8,0,0.6832053661346436
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,2,128,1,float16,fp8,0,0.08949333429336548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,1,128,1,float16,float16,0,0.13860799868901572
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,4,128,1,float16,float16,0,0.10706667105356853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,1,128,1,float16,fp8,0,0.1302720010280609
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,1,128,1,float16,float16,0,1.015232006708781
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,1,128,1,float16,fp8,0,0.8786506652832031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,1,128,1,float16,fp8,0,0.215338667233785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,2,128,1,float16,fp8,0,1.0107306639353435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,4,128,1,float16,float16,0,1.5858346621195476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,12,128,1,float16,float16,0,0.8591093222300211
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,4,128,1,float16,fp8,0,0.1050933301448822
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,12,128,1,float16,fp8,0,0.9017226696014404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,1,128,1,float16,float16,0,0.5149653355280558
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,1,128,1,float16,fp8,0,0.4620800018310547
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,2,128,1,float16,float16,0,0.5572959979375204
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,4,128,1,float16,float16,0,0.1720906694730123
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,4,128,1,float16,float16,0,0.8156479994455973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,4,128,1,float16,fp8,0,0.7906239827473959
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,12,128,1,float16,float16,0,0.43985601266225177
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,4,128,1,float16,fp8,0,1.404970645904541
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,1,128,1,float16,fp8,0,0.2531786759694417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,2,128,1,float16,fp8,0,0.2829599976539612
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,4,128,1,float16,float16,0,0.41864001750946045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,2,128,1,float16,fp8,0,0.5286773443222046
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,2,128,1,float16,float16,0,1.0998400052388508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,4,128,1,float16,fp8,0,0.4111520051956177
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,12,128,1,float16,fp8,0,0.25094399849573773
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,1,128,1,float16,float16,0,0.1504906713962555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,1,128,1,float16,float16,0,0.27402667204538983
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,2,128,1,float16,float16,0,0.16617600123087564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,2,128,1,float16,fp8,0,0.15197867155075073
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,4,128,1,float16,float16,0,0.22728000084559122
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,4,128,1,float16,fp8,0,0.20791999499003092
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,12,128,1,float16,float16,0,0.13895466923713684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,12,128,1,float16,fp8,0,0.13486400246620178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,1,128,1,float16,float16,0,0.09630399942398071
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,1,128,1,float16,fp8,0,0.09035733342170715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,12,128,1,float16,float16,0,0.24100800355275473
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,2,128,1,float16,fp8,0,0.09780800342559814
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,4,128,1,float16,float16,0,0.11878400047620137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,4,128,1,float16,fp8,0,0.12097600102424622
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,2,128,1,float16,float16,0,0.2996693253517151
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,12,128,1,float16,float16,0,0.07083199918270111
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,12,128,1,float16,fp8,0,0.07275733351707458
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,1,128,1,float16,fp8,0,0.05494399865468343
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,2,128,1,float16,float16,0,0.06053866446018219
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,2,128,1,float16,fp8,0,0.0581279993057251
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,4,128,1,float16,float16,0,0.06887466708819072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,4,128,1,float16,fp8,0,0.06830933193365733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,2,128,1,float16,float16,0,0.10077866911888123
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,1,128,1,float16,float16,0,0.9869706630706787
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,1,128,1,float16,fp8,0,0.9127519925435384
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,2,128,1,float16,float16,0,1.1187840302785237
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,2,128,1,float16,fp8,0,1.0784320036570232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,4,128,1,float16,fp8,0,0.5692960023880005
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,4,128,1,float16,float16,0,1.843440055847168
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,4,128,1,float16,fp8,0,1.5641546249389648
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,1,128,1,float16,float16,0,0.5196693340937296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,12,128,1,float16,fp8,0,0.4681226809819539
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,2,128,1,float16,float16,0,0.09532266855239868
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,2,128,1,float16,fp8,0,0.5562613407770792
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,4,128,1,float16,float16,0,0.8799946308135986
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,1,128,1,float16,float16,0,0.05832533538341522
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,4,128,1,float16,fp8,0,0.8473066488901774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,12,128,1,float16,float16,0,0.49489065011342365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,12,128,1,float16,fp8,0,1.0564746856689453
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,12,128,1,float16,float16,0,0.9645226796468099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,1,128,1,float16,float16,0,0.2693546613057454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,1,128,1,float16,fp8,0,0.472922682762146
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,1,128,1,float16,fp8,0,0.25269333521525067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,1,128,1,float16,fp8,0,0.14325333635012308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,4,128,1,float16,fp8,0,0.4312906662623088
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,12,128,1,float16,float16,0,0.262661337852478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,12,128,1,float16,fp8,0,0.2860959966977437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,1,128,1,float16,float16,0,0.1509866714477539
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,1,128,1,float16,fp8,0,0.13904000322024027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,2,128,1,float16,float16,0,0.1662506659825643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,2,128,1,float16,fp8,0,0.1591093341509501
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,4,128,1,float16,float16,0,0.2505493362744649
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,4,128,1,float16,fp8,0,0.25814932584762573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,12,128,1,float16,float16,0,0.14636266231536865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,12,128,1,float16,fp8,0,0.15424533685048422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,1,128,1,float16,float16,0,0.08921066919962566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,1,128,1,float16,fp8,0,0.08422399560610454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,2,128,1,float16,float16,0,0.09548800190289815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,2,128,1,float16,fp8,0,0.09423466523488362
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,4,128,1,float16,float16,0,0.12640000383059183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,4,128,1,float16,fp8,0,0.12427199880282085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,12,128,1,float16,float16,0,0.07729599873224895
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,1,128,1,float16,float16,0,0.05879466732343038
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,2,128,1,float16,float16,0,0.06233599781990051
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,12,128,1,float16,fp8,0,0.5427039861679077
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,2,128,1,float16,fp8,0,0.060229331254959106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,4,128,1,float16,float16,0,0.07417599856853485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,2,128,1,float16,float16,0,0.5828693310419718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,12,128,1,float16,float16,0,0.04995200037956238
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,12,128,1,float16,fp8,0,0.05355200171470642
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,1,128,1,float16,float16,0,0.04173333446184794
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,1,128,1,float16,fp8,0,0.03994666785001755
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,12,128,1,float16,fp8,0,0.08186666667461395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,1,128,1,float16,fp8,0,0.05605333546797434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,4,128,1,float16,float16,0,0.04996799925963084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,4,128,1,float16,fp8,0,0.04980800052483877
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,4,128,1,float16,fp8,0,0.07548800110816956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,1,128,1,float16,float16,0,0.6403466860453287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,1,128,1,float16,fp8,0,0.5964320103327433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,4,128,1,float16,float16,0,0.44704000155131024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,2,128,1,float16,float16,0,0.04308799902598063
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,2,128,1,float16,fp8,0,0.04192533095677694
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,4,128,1,float16,float16,0,1.2365333239237468
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,12,128,1,float16,float16,0,0.6704426606496176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,1,128,1,float16,float16,0,0.33102933565775555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,2,128,1,float16,fp8,0,0.29427733023961383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,1,128,1,float16,fp8,0,0.31302400430043537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,2,128,1,float16,float16,0,0.3787306547164917
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,2,128,1,float16,fp8,0,0.37881068388621014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,4,128,1,float16,float16,0,0.6089280049006144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,4,128,1,float16,fp8,0,0.6440159877141317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,2,128,1,float16,fp8,0,0.7177013556162516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,4,128,1,float16,fp8,0,1.2810773054758708
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,12,128,1,float16,float16,0,0.347488005956014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,1,128,1,float16,fp8,0,0.1714400053024292
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,2,128,1,float16,float16,0,0.20855466524759927
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,2,128,1,float16,fp8,0,0.2051680088043213
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,4,128,1,float16,float16,0,0.3267413377761841
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,2,128,1,float16,float16,0,0.30238932371139526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,12,128,1,float16,float16,0,0.18679465850194296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,12,128,1,float16,fp8,0,0.2052853306134542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,1,128,1,float16,float16,0,0.09918933113416036
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,1,128,1,float16,fp8,0,0.09666132926940918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,2,128,1,float16,float16,0,0.11352533102035522
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,2,128,1,float16,fp8,0,0.10689600308736165
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,2,128,1,float16,float16,0,0.7294613520304362
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,4,128,1,float16,float16,0,0.1763413349787394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,12,128,1,float16,fp8,0,0.38628800710042316
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,1,128,1,float16,float16,0,0.180842657883962
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,12,128,1,float16,fp8,0,0.10173867146174113
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,2,128,1,float16,float16,0,0.06828799843788147
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,2,128,1,float16,fp8,0,0.06798399984836578
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,4,128,1,float16,float16,0,0.08514133095741272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,4,128,1,float16,fp8,0,0.09082133571306865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,12,128,1,float16,float16,0,0.049957334995269775
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,12,128,1,float16,fp8,0,0.056074668963750206
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,1,128,1,float16,float16,0,0.03925333420435587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,1,128,1,float16,fp8,0,0.037578667203585304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,2,128,1,float16,float16,0,0.04106666644414266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,4,128,1,float16,fp8,0,0.16482133666674295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,12,128,1,float16,float16,0,0.10542399684588115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,4,128,1,float16,fp8,0,0.052095999320348106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,1,128,1,float16,fp8,0,0.06011199951171875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,12,128,1,float16,float16,0,0.04140799989302953
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,12,128,1,float16,fp8,0,0.04251199960708618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,1,128,1,float16,float16,0,0.03398933261632919
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,1,128,1,float16,fp8,0,0.03366933266321818
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,4,128,1,float16,fp8,0,0.33241599798202515
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,2,128,1,float16,fp8,0,0.035642666121323906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,4,128,1,float16,float16,0,0.03985599925120672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,2,128,1,float16,fp8,0,0.041322665909926094
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,1,128,1,float16,fp8,0,0.6617066860198975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,12,128,1,float16,fp8,0,0.7495306332906088
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,2,128,1,float16,float16,0,0.817477305730184
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,2,128,1,float16,fp8,0,0.8252106507619222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,4,128,1,float16,fp8,0,0.04162666698296865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,4,128,1,float16,float16,0,1.5224693616231282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,1,128,1,float16,float16,0,0.6805333296457926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,12,128,1,float16,float16,0,0.8056053320566813
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,1,128,1,float16,float16,0,0.062261333068211876
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,1,128,1,float16,fp8,0,0.3468586603800456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,2,128,1,float16,float16,0,0.417738676071167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,12,128,1,float16,fp8,0,0.9257919788360596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,2,128,1,float16,float16,0,0.03566399961709976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,2,128,1,float16,fp8,0,0.42632532119750977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,12,128,1,float16,float16,0,0.4137706756591797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,4,128,1,float16,fp8,0,0.6750666300455729
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,12,128,1,float16,fp8,0,0.4729280074437459
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,4,128,1,float16,float16,0,0.04809066653251648
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,2,128,1,float16,float16,0,0.23169066508611044
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,2,128,1,float16,fp8,0,0.22598934173583984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,4,128,1,float16,fp8,0,0.3575413227081299
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,1,128,1,float16,float16,0,0.3505013386408488
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,12,128,1,float16,float16,0,0.21785066525141397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,1,128,1,float16,float16,0,0.10627733667691548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,12,128,1,float16,fp8,0,0.2471839984258016
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,1,128,1,float16,fp8,0,0.09941333532333374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,2,128,1,float16,float16,0,0.12332800030708313
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,2,128,1,float16,fp8,0,0.12019200126330058
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,4,128,1,float16,float16,0,0.19699732462565103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,1,128,1,float16,float16,0,0.19076265891393027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,1,128,1,float16,fp8,0,0.1839146614074707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,1,128,1,float16,float16,0,0.06390400230884552
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,1,128,1,float16,fp8,0,0.06121600170930227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,2,128,1,float16,float16,0,0.06766400237878163
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,4,128,1,float16,float16,0,0.3770293394724528
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,4,128,1,float16,float16,0,0.09446932872136433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,4,128,1,float16,fp8,0,0.10280533631642659
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,12,128,1,float16,float16,0,0.05936533212661743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,4,128,1,float16,float16,0,0.7506186962127686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,4,128,1,float16,fp8,0,0.21267733971277872
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,1,128,1,float16,fp8,0,0.040549332896868386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,4,128,1,float16,fp8,0,1.58516263961792
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,12,128,1,float16,float16,0,0.11970667044321696
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,4,128,1,float16,fp8,0,0.061850666999816895
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,12,128,1,float16,float16,0,0.03755199909210205
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,12,128,1,float16,fp8,0,0.04029333343108495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,1,128,1,float16,float16,0,0.030432000756263733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,1,128,1,float16,fp8,0,0.02828799933195114
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,2,128,1,float16,float16,0,0.04582933088143667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,2,128,1,float16,fp8,0,0.0295413335164388
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,4,128,1,float16,float16,0,0.03566399961709976
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,4,128,1,float16,fp8,0,0.037658666570981346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,12,128,1,float16,float16,0,0.03160000095764796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,12,128,1,float16,fp8,0,0.0664213349421819
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,1,128,1,float16,float16,0,0.041749333341916404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,1,128,1,float16,fp8,0,0.025578667720158894
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,2,128,1,float16,float16,0,0.030080000559488933
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,12,128,1,float16,fp8,0,0.13214932878812155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,4,128,1,float16,float16,0,0.03068800022204717
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,4,128,1,float16,fp8,0,0.03142400085926056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,1,128,1,float16,float16,0,0.02756800005833308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,1,128,1,float16,float16,0,0.5310080051422119
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,2,128,1,float16,float16,0,0.02771199991305669
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,2,128,1,float16,fp8,0,0.0461706668138504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,2,128,1,float16,fp8,0,0.06994133194287618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,1,128,1,float16,fp8,0,0.5344800154368082
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,2,128,1,float16,float16,0,0.6662400166193644
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,2,128,1,float16,fp8,0,0.6965386867523193
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,12,128,1,float16,fp8,0,0.03356799980004629
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,4,128,1,float16,float16,0,1.3185653686523438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,1,128,1,float16,float16,0,0.28092799584070843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,4,128,1,float16,fp8,0,1.4450987180074055
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,2,128,1,float16,fp8,0,0.0276053324341774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,1,128,1,float16,fp8,0,0.277893324693044
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,2,128,1,float16,float16,0,0.34641067186991376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,2,128,1,float16,fp8,0,0.3596266508102417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,12,128,1,float16,float16,0,0.37329065799713135
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,4,128,1,float16,float16,0,0.6897386709849039
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,1,128,1,float16,fp8,0,0.14994666973749796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,2,128,1,float16,float16,0,0.18454400698343912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,2,128,1,float16,fp8,0,0.19323732455571493
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,4,128,1,float16,float16,0,0.34251733620961505
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,4,128,1,float16,float16,0,0.0568800022204717
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,12,128,1,float16,float16,0,0.1957706610361735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,12,128,1,float16,fp8,0,0.43689600626627606
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,1,128,1,float16,float16,0,0.08513599634170532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,12,128,1,float16,float16,0,0.7258400122324625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,2,128,1,float16,float16,0,0.10188266634941101
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,2,128,1,float16,fp8,0,0.10218133529027303
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,12,128,1,float16,fp8,0,0.855135997136434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,4,128,1,float16,float16,0,0.17900800704956055
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,4,128,1,float16,fp8,0,0.20016533136367798
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,12,128,1,float16,float16,0,0.10640533765157063
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,12,128,1,float16,fp8,0,0.11961066722869873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,1,128,1,float16,fp8,0,0.048885335524876915
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,2,128,1,float16,float16,0,0.05618133147557577
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,1,128,1,float16,float16,0,0.15056000153223673
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,2,128,1,float16,fp8,0,0.058186665177345276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,4,128,1,float16,float16,0,0.07901333272457123
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,4,128,1,float16,fp8,0,0.09087466200192769
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,1,128,1,float16,float16,0,0.03356266766786575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,12,128,1,float16,fp8,0,0.0591839998960495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,1,128,1,float16,fp8,0,0.033546666304270424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,2,128,1,float16,float16,0,0.03661333272854487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,4,128,1,float16,fp8,0,0.32230933507283527
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,1,128,1,float16,float16,0,0.04851733148097992
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,4,128,1,float16,float16,0,0.04795200129350027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,4,128,1,float16,fp8,0,0.05413866539796194
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,12,128,1,float16,fp8,0,0.03583999971548716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,1,128,1,float16,fp8,0,0.08090133468310039
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,1,128,1,float16,float16,0,0.02332266668478648
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,12,128,1,float16,float16,0,0.05049600203831991
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,2,128,1,float16,float16,0,0.023344000180562336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,2,128,1,float16,fp8,0,0.02496533344189326
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,4,128,1,float16,float16,0,0.029477333029111225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,4,128,1,float16,fp8,0,0.031717332700888314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,2,128,1,float16,fp8,0,0.03909866760174433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,12,128,1,float16,float16,0,0.02533866713444392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,12,128,1,float16,fp8,0,0.027269333600997925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,12,128,1,float16,fp8,0,0.2278453310330709
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,12,128,1,float16,float16,0,0.030554667115211487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,4,128,1,float16,fp8,0,0.025285333395004272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,1,128,1,float16,fp8,0,0.023130667706330616
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,12,128,1,float16,fp8,0,0.02325333406527837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,1,128,1,float16,float16,0,0.02057066683967908
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,1,128,1,float16,fp8,0,0.020138667275508244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,1,128,1,float16,float16,0,0.02162666618824005
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,2,128,1,float16,float16,0,0.019823999454577763
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,4,128,1,float16,float16,0,0.021525333325068157
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,4,128,1,float16,fp8,0,0.02143999934196472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,12,128,1,float16,float16,0,0.021562665700912476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,1,128,1,float16,float16,0,0.23719465732574463
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,4,128,1,float16,fp8,0,0.6101760069529215
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,1,128,1,float16,fp8,0,0.2407146692276001
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,2,128,1,float16,float16,0,0.3103040059407552
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,2,128,1,float16,fp8,0,0.32361066341400146
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,4,128,1,float16,float16,0,0.6095093488693237
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,4,128,1,float16,fp8,0,0.5945119857788086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,12,128,1,float16,float16,0,0.351855993270874
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,12,128,1,float16,fp8,0,0.4177653392155965
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,1,128,1,float16,float16,0,0.12965333461761475
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,1,128,1,float16,fp8,0,0.13201600313186646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,2,128,1,float16,fp8,0,0.17679999272028604
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,4,128,1,float16,float16,0,0.31632000207901
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,4,128,1,float16,fp8,0,0.30668799082438153
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,1,128,1,float16,fp8,0,0.02120000123977661
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,2,128,1,float16,fp8,0,0.021642667551835377
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,12,128,1,float16,float16,0,0.18574933211008707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,1,128,1,float16,float16,0,0.07462933162848155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,12,128,1,float16,fp8,0,0.21810134251912436
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,2,128,1,float16,float16,0,0.09231467048327129
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,2,128,1,float16,float16,0,0.17011199394861856
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,4,128,1,float16,float16,0,0.17094933986663818
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,4,128,1,float16,fp8,0,0.19083199898401895
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,12,128,1,float16,float16,0,0.09910933176676433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,12,128,1,float16,fp8,0,0.11329066753387451
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,1,128,1,float16,float16,0,0.04171200096607208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,1,128,1,float16,fp8,0,0.043434664607048035
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,2,128,1,float16,fp8,0,0.051967998345692955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,4,128,1,float16,float16,0,0.07452266911665599
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,1,128,1,float16,fp8,0,0.07077333331108093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,12,128,1,float16,float16,0,0.04585599899291992
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,2,128,1,float16,fp8,0,0.08750399947166443
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,1,128,1,float16,float16,0,0.02942399928967158
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,1,128,1,float16,fp8,0,0.029472000896930695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,2,128,1,float16,float16,0,0.032826667030652366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,2,128,1,float16,fp8,0,0.03568533311287562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,4,128,1,float16,float16,0,0.043680002291997276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,2,128,1,float16,float16,0,0.02232533444960912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,4,128,1,float16,fp8,0,0.04996799925963084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,4,128,1,float16,float16,0,0.02390933285156886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,1,128,1,float16,float16,0,0.01926933353145917
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,1,128,1,float16,fp8,0,0.020234666764736176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,2,128,1,float16,float16,0,0.021253332495689392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,2,128,1,float16,fp8,0,0.021498667697111767
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,4,128,1,float16,float16,0,0.025994665920734406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,12,128,1,float16,fp8,0,0.056277334690093994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,12,128,1,float16,float16,0,0.02125866711139679
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,1,128,1,float16,float16,0,0.017263999829689663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,2,128,1,float16,fp8,0,0.020128000527620316
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,1,128,1,float16,fp8,0,0.01709866647919019
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,2,128,1,float16,float16,0,0.01893866683046023
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,12,128,1,float16,fp8,0,0.033002667129039764
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,2,128,1,float16,fp8,0,0.019253333409627277
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,4,128,1,float16,fp8,0,0.08374399940172832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,12,128,1,float16,float16,0,0.019226666539907455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,4,128,1,float16,fp8,0,0.029461334149042766
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,1,128,1,float16,float16,0,0.017077332983414333
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,1,128,1,float16,fp8,0,0.017152000218629837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,12,128,1,float16,fp8,0,0.024421334266662598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,2,128,1,float16,fp8,0,0.017082666357358296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,4,128,1,float16,float16,0,0.019317333896954853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,4,128,1,float16,fp8,0,0.019066666563351948
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,12,128,1,float16,float16,0,0.017338667064905167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,12,128,1,float16,fp8,0,0.017423999806245167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,4,128,1,float16,float16,0,0.02128000060717265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,4,128,1,float16,fp8,0,0.023242667317390442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,1,128,1,float16,fp8,0,0.017194667210181553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,2,128,1,float16,float16,0,0.01725333308180173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,12,128,1,float16,fp8,0,0.01923199991385142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,4,128,1,float16,float16,0,0.01709866647919019
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,2,128,1,float16,float16,0,0.01718933383623759
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,1,128,1,float16,float16,0,0.14301333824793497
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,1,128,1,float16,fp8,0,0.1446346640586853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,2,128,1,float16,float16,0,0.18515199422836304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,1,128,1,float16,float16,0,0.01730666682124138
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,2,128,1,float16,fp8,0,0.017008000363906223
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,2,128,1,float16,fp8,0,0.1901866594950358
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,4,128,1,float16,float16,0,0.32099199295043945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,4,128,1,float16,fp8,0,0.32549866040547687
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,12,128,1,float16,float16,0,0.18906132380167642
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,12,128,1,float16,fp8,0,0.22389866908391318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,1,128,1,float16,float16,0,0.08021333316961925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,1,128,1,float16,fp8,0,0.07662400106589
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,2,128,1,float16,fp8,0,0.09928533434867859
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,4,128,1,float16,float16,0,0.1755733291308085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,4,128,1,float16,fp8,0,0.017258666455745697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,4,128,1,float16,fp8,0,0.19548799594243368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,12,128,1,float16,float16,0,0.10146666566530864
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,12,128,1,float16,fp8,0,0.1172106663386027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,1,128,1,float16,float16,0,0.043893332282702126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,1,128,1,float16,fp8,0,0.04630400240421295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,2,128,1,float16,float16,0,0.05193600058555603
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,4,128,1,float16,float16,0,0.07994133234024048
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,12,128,1,float16,float16,0,0.02741333345572154
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,12,128,1,float16,float16,0,0.04399466514587402
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,12,128,1,float16,fp8,0,0.054010664423306785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,1,128,1,float16,float16,0,0.02743999908367793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,2,128,1,float16,float16,0,0.09659733374913533
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,2,128,1,float16,float16,0,0.031285333136717476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,4,128,1,float16,float16,0,0.04190400242805481
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,4,128,1,float16,fp8,0,0.049285332361857094
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,12,128,1,float16,float16,0,0.027493332823117573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,12,128,1,float16,fp8,0,0.03382933388153712
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,2,128,1,float16,float16,0,0.021429332594076794
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,2,128,1,float16,fp8,0,0.0561653325955073
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,2,128,1,float16,float16,0,0.047877331574757896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,4,128,1,float16,float16,0,0.027471999327341717
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,1,128,1,float16,fp8,0,0.027552001178264618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,12,128,1,float16,float16,0,0.019173332800467808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,2,128,1,float16,fp8,0,0.033530667424201965
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,1,128,1,float16,fp8,0,0.014287999520699183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,1,128,1,float16,float16,0,0.01921066641807556
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,2,128,1,float16,float16,0,0.015072000523408255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,2,128,1,float16,fp8,0,0.016399999459584553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,4,128,1,float16,fp8,0,0.019120000302791595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,4,128,1,float16,fp8,0,0.08678932984670003
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,4,128,1,float16,fp8,0,0.03070399910211563
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,12,128,1,float16,fp8,0,0.01703466723362605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,1,128,1,float16,float16,0,0.012949333836634954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,12,128,1,float16,fp8,0,0.021242665747801464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,2,128,1,float16,float16,0,0.013264000415802002
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,1,128,1,float16,float16,0,0.015098666151364645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,4,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,1,128,1,float16,fp8,0,0.021162666380405426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,12,128,1,float16,float16,0,0.013189333180586496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,2,128,1,float16,fp8,0,0.02332799881696701
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,1,128,1,float16,fp8,0,0.013280000537633896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,12,128,1,float16,float16,0,0.015295999745527903
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,2,128,1,float16,float16,0,0.01322666679819425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,4,128,1,float16,float16,0,0.013258667041858038
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,1,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,2,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,4,128,1,float16,float16,0,0.01523200049996376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,12,128,1,float16,float16,0,0.012981332838535309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,4,128,1,float16,float16,0,0.017184000462293625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,1,128,1,float16,float16,0,0.012991999586423239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,1,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,12,128,1,float16,fp8,0,0.014165333161751429
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,4,128,1,float16,float16,0,0.012960000584522883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,4,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,2,128,1,float16,fp8,0,0.013269333789745966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,1,128,1,float16,float16,0,0.10744000474611919
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,1,128,1,float16,fp8,0,0.10321066776911418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,12,128,1,float16,fp8,0,0.013194666554530462
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,2,128,1,float16,float16,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,4,128,1,float16,float16,0,0.20153599977493286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,4,128,1,float16,fp8,0,0.22129066785176596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,12,128,1,float16,float16,0,0.11433066924413045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,1,128,1,float16,float16,0,0.05820266902446747
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,2,128,1,float16,float16,0,0.12355732917785645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,2,128,1,float16,float16,0,0.06320533156394958
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,2,128,1,float16,fp8,0,0.12560000022252402
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,2,128,1,float16,fp8,0,0.06798933446407318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,4,128,1,float16,float16,0,0.09912533561388652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,4,128,1,float16,fp8,0,0.09937600294748943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,12,128,1,float16,float16,0,0.05343999962011973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,12,128,1,float16,fp8,0,0.12787200013796488
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,4,128,1,float16,fp8,0,0.01351999988158544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,1,128,1,float16,float16,0,0.03515733281771342
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,1,128,1,float16,fp8,0,0.05836800237496694
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,2,128,1,float16,fp8,0,0.039818666875362396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,4,128,1,float16,float16,0,0.04942933221658071
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,4,128,1,float16,fp8,0,0.05596800148487091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,12,128,1,float16,float16,0,0.029525332152843475
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,2,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,1,128,1,float16,float16,0,0.02124800036350886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,1,128,1,float16,fp8,0,0.023344000180562336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,2,128,1,float16,fp8,0,0.02513599892457326
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,12,128,1,float16,fp8,0,0.060138667623202004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,4,128,1,float16,float16,0,0.02958400050799052
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,4,128,1,float16,fp8,0,0.032416000962257385
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,12,128,1,float16,float16,0,0.021301334102948506
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,1,128,1,float16,float16,0,0.017136000096797943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,1,128,1,float16,fp8,0,0.017103999853134155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,2,128,1,float16,float16,0,0.01736533393462499
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,12,128,1,float16,fp8,0,0.035631999373435974
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,1,128,1,float16,float16,0,0.01313599944114685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,4,128,1,float16,float16,0,0.019776000330845516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,2,128,1,float16,float16,0,0.02334933231274287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,12,128,1,float16,float16,0,0.01515199989080429
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,1,128,1,float16,float16,0,0.01301866645614306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,1,128,1,float16,fp8,0,0.03547733277082443
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,1,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,2,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,2,128,1,float16,fp8,0,0.01923199991385142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,4,128,1,float16,fp8,0,0.015279999623696009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,4,128,1,float16,fp8,0,0.023237332701683044
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,12,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,12,128,1,float16,fp8,0,0.01611199975013733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,1,128,1,float16,float16,0,0.013082666943470636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,2,128,1,float16,float16,0,0.013066666821638743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,2,128,1,float16,float16,0,0.03782933453718821
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,2,128,1,float16,float16,0,0.012975999464591345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,4,128,1,float16,fp8,0,0.013141332815090815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,12,128,1,float16,float16,0,0.012949333836634954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,4,128,1,float16,float16,0,0.01509333277742068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,1,128,1,float16,float16,0,0.011429333438475927
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,1,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,2,128,1,float16,float16,0,0.012293333808581034
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,2,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,12,128,1,float16,float16,0,0.012970666090647379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,4,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,1,128,1,float16,fp8,0,0.01314666618903478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,12,128,1,float16,float16,0,0.011930666863918304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,12,128,1,float16,fp8,0,0.023434666295846302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,12,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,1,128,1,float16,fp8,0,0.011685332904259363
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,2,128,1,float16,fp8,0,0.011946666985750198
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,12,128,1,float16,fp8,0,0.013237333546082178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,4,128,1,float16,fp8,0,0.0195573332409064
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,4,128,1,float16,float16,0,0.012954667210578918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,1,128,1,float16,float16,0,0.0855519970258077
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,1,128,1,float16,float16,0,0.01118933285276095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,4,128,1,float16,float16,0,0.012671999633312225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,4,128,1,float16,float16,0,0.011205332974592844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,12,128,1,float16,float16,0,0.07639466722806294
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,1,128,1,float16,fp8,0,0.084714670976003
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,12,128,1,float16,fp8,0,0.07251733541488647
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,1,128,1,float16,float16,0,0.04805333415667216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,2,128,1,float16,fp8,0,0.013141332815090815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,1,128,1,float16,fp8,0,0.047797332207361855
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,2,128,1,float16,float16,0,0.051962668697039284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,2,128,1,float16,fp8,0,0.05333866675694784
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,4,128,1,float16,float16,0,0.1363200048605601
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,4,128,1,float16,fp8,0,0.0683840016523997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,12,128,1,float16,float16,0,0.0377813329299291
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,1,128,1,float16,float16,0,0.029680001238981884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,12,128,1,float16,fp8,0,0.0424586683511734
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,1,128,1,float16,fp8,0,0.029578665892283123
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,2,128,1,float16,float16,0,0.03150933235883713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,2,128,1,float16,fp8,0,0.031504000226656594
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,2,128,1,float16,fp8,0,0.09243200222651164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,4,128,1,float16,float16,0,0.03710933278004328
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,12,128,1,float16,float16,0,0.023189333577950794
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,4,128,1,float16,float16,0,0.06358399987220764
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,4,128,1,float16,fp8,0,0.12388267119725545
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,1,128,1,float16,fp8,0,0.019194666296243668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,4,128,1,float16,float16,0,0.023306667804718018
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,4,128,1,float16,fp8,0,0.025477332373460133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,12,128,1,float16,float16,0,0.01714133347074191
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,4,128,1,float16,fp8,0,0.03949866692225138
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,12,128,1,float16,fp8,0,0.018842666099468868
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,12,128,1,float16,fp8,0,0.025455998877684276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,1,128,1,float16,float16,0,0.019173332800467808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,2,128,1,float16,float16,0,0.021114667256673176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,2,128,1,float16,float16,0,0.015237333873907724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,2,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,4,128,1,float16,float16,0,0.017077332983414333
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,4,128,1,float16,fp8,0,0.017344000438849132
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,12,128,1,float16,float16,0,0.013002666334311167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,1,128,1,float16,float16,0,0.012159999459981918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,2,128,1,float16,float16,0,0.012037333101034164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,2,128,1,float16,fp8,0,0.012863999853531519
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,4,128,1,float16,float16,0,0.011535999675591787
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,1,128,1,float16,float16,0,0.01492799942692121
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,4,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,12,128,1,float16,float16,0,0.011338666081428528
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,2,128,1,float16,fp8,0,0.021375998854637146
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,12,128,1,float16,fp8,0,0.011157333850860596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,1,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,12,128,1,float16,fp8,0,0.012944000462690989
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,2,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,1,128,1,float16,fp8,0,0.013269333789745966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,4,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,12,128,1,float16,float16,0,0.011285333583752314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,12,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,1,128,1,float16,float16,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,1,128,1,float16,fp8,0,0.011866666376590729
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,2,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,1,128,1,float16,float16,0,0.011264000087976456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,4,128,1,float16,float16,0,0.010922666639089584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,2,128,1,float16,float16,0,0.011157333850860596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,4,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,2,128,1,float16,float16,0,0.09107733766237895
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,12,128,1,float16,fp8,0,0.01201066623131434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,1,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,1,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,1,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,2,128,1,float16,fp8,0,0.01192533348997434
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,4,128,1,float16,float16,0,0.01108266661564509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,1,128,1,float16,float16,0,0.08091199894746144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,4,128,1,float16,float16,0,0.011002667248249054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,2,128,1,float16,float16,0,0.011045332998037338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,2,128,1,float16,float16,0,0.010944000134865442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,2,128,1,float16,float16,0,0.08550399541854858
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,4,128,1,float16,float16,0,0.09684266646703084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,4,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,4,128,1,float16,fp8,0,0.09482666850090027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,12,128,1,float16,float16,0,0.05499733487764994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,1,128,1,float16,fp8,0,0.07450133562088013
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,12,128,1,float16,float16,0,0.011109333485364914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,1,128,1,float16,fp8,0,0.04365866879622141
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,2,128,1,float16,fp8,0,0.04582933088143667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,4,128,1,float16,float16,0,0.052239999175071716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,4,128,1,float16,fp8,0,0.052000001072883606
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,12,128,1,float16,float16,0,0.03243733445803324
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,2,128,1,float16,fp8,0,0.08016000191370647
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,1,128,1,float16,float16,0,0.027647999425729115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,2,128,1,float16,fp8,0,0.028405333558718365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,4,128,1,float16,float16,0,0.031471999982992806
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,1,128,1,float16,float16,0,0.045706664522488914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,4,128,1,float16,fp8,0,0.03147733211517334
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,12,128,1,float16,float16,0,0.02096533278624217
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,1,128,1,float16,float16,0,0.019146667172511418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,2,128,1,float16,float16,0,0.047983999053637184
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,1,128,1,float16,fp8,0,0.017162666966517765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,2,128,1,float16,fp8,0,0.019194666296243668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,12,128,1,float16,fp8,0,0.03356266766786575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,4,128,1,float16,fp8,0,0.02107733239730199
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,2,128,1,float16,float16,0,0.029450667401154835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,12,128,1,float16,float16,0,0.015109332899252573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,1,128,1,float16,float16,0,0.015109332899252573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,1,128,1,float16,fp8,0,0.015103999525308609
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,12,128,1,float16,fp8,0,0.05551466842492422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,2,128,1,float16,float16,0,0.014997333288192749
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,2,128,1,float16,fp8,0,0.015184000134468079
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,4,128,1,float16,float16,0,0.01522133375207583
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,4,128,1,float16,fp8,0,0.015237333873907724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,12,128,1,float16,float16,0,0.011312000453472137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,12,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,1,128,1,float16,fp8,0,0.012319999436537424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,4,128,1,float16,float16,0,0.019466667125622433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,2,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,4,128,1,float16,float16,0,0.011039999624093374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,1,128,1,float16,fp8,0,0.02735466758410136
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,4,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,12,128,1,float16,float16,0,0.011087999989589056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,12,128,1,float16,fp8,0,0.012938667088747025
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,12,128,1,float16,fp8,0,0.015295999745527903
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,1,128,1,float16,fp8,0,0.011264000087976456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,2,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,4,128,1,float16,float16,0,0.01128000020980835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,12,128,1,float16,fp8,0,0.021141332884629566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,4,128,1,float16,fp8,0,0.011157333850860596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,1,128,1,float16,float16,0,0.011215999722480774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,2,128,1,float16,float16,0,0.019237333287795384
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,1,128,1,float16,fp8,0,0.011066666493813196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,2,128,1,float16,float16,0,0.011087999989589056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,2,128,1,float16,fp8,0,0.011055999745925268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,4,128,1,float16,float16,0,0.011178666104873022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,4,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,12,128,1,float16,float16,0,0.011141333729028702
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,12,128,1,float16,fp8,0,0.011002667248249054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,1,128,1,float16,float16,0,0.011034666250149408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,1,128,1,float16,float16,0,0.011237333218256632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,1,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,2,128,1,float16,fp8,0,0.011152000476916632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,4,128,1,float16,float16,0,0.010960000256697336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,4,128,1,float16,fp8,0,0.011141333729028702
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,12,128,1,float16,float16,0,0.01128000020980835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,0,0.012997332960367203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,0,0.015141333142916361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,2,128,1,float16,float16,0,0.011407999942700068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,0,0.019317333896954853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,12,128,1,float16,float16,0,0.025279998779296875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,2,128,1,float16,float16,0,0.010965333630641302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,0,0.011071999867757162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,0,0.012133333832025528
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,0,0.01321600005030632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,2,128,1,float16,float16,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,12,128,1,float16,float16,0,0.01714666684468587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,12,128,1,float16,fp8,0,0.015439999600251516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,0,0.008986666798591614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,12,128,1,float16,fp8,0,0.021520001192887623
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,0,0.009098666409651438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,0,0.011402666568756104
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,12,128,1,float16,float16,0,0.01302933320403099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,12,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,0,0.009216000015536943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,12,128,1,float16,fp8,0,0.011002667248249054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,0,0.009050666665037474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,0,0.0100853331387043
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,0,0.011034666250149408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,0,0.010485333700974783
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,0,0.015157333264748255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,12,128,1,float16,float16,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,0,0.009557333464423815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,0,0.00919999989370505
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,0,0.009370666618148485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,1,128,1,float16,float16,0,0.011226666470368704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,0,0.00921066664159298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,0,0.009088000282645226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,12,128,1,float16,float16,0,0.009189333145817121
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,12,128,1,float16,fp8,0,0.009183999771873156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,0,0.008943999807039896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,0,0.009258666386206945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,0,0.009152000149091085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,0,0.009775999933481216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,0,0.009088000282645226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,12,128,1,float16,float16,0,0.00892800030608972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,12,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,0,0.009989333028594652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,0,0.008938666433095932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,0,0.008874666566650072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,0,0.009136000027259191
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,0,0.00898133342464765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,12,128,1,float16,fp8,0,0.009232000137368837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,0,0.00901333304742972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,0,0.009008000294367472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,0,0.010863999525705973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,0,0.009141333401203156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,0,0.009162666896979014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,12,128,1,float16,float16,0,0.009226666763424873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,12,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,0,0.009589333087205887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,12,128,1,float16,float16,0,0.009237333511312803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,0,0.009408000235756239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,0,0.008986666798591614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,0,0.009232000137368837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,0,0.009039999917149544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,12,128,1,float16,fp8,0,0.009119999905427298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,1,128,1,float16,float16,0,3.5917174021402993
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,2,128,1,float16,float16,0,3.7465438842773438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,0,0.009237333511312803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,0,0.011146667102972666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,2,128,1,float16,fp8,0,3.0870612462361655
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,float16,0,2.273973306020101
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,4,128,1,float16,float16,0,4.484810511271159
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,1,128,1,float16,float16,0,1.8372693061828613
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,1.9926293690999348
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,4,128,1,float16,fp8,0,4.054767926534017
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,1,128,1,float16,fp8,0,2.920069376627604
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,1,128,1,float16,fp8,0,1.5401172637939453
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,2,128,1,float16,float16,0,1.9286719957987468
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,2,128,1,float16,fp8,0,1.6986346244812012
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,4,128,1,float16,fp8,0,1.922602653503418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,float16,0,2.2422080039978027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,0,0.009466666728258133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,1,128,1,float16,float16,0,0.9807039896647135
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,1,128,1,float16,fp8,0,0.8496426741282145
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,2,128,1,float16,fp8,0,0.8736159801483154
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,4,128,1,float16,fp8,0,1.0382346312204997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,4,128,1,float16,float16,0,2.2834134101867676
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,2.00325870513916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,float16,0,1.1808319886525471
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,1.0573386351267497
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,1.0663519700368245
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,float16,0,0.6420053243637085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,1,128,1,float16,float16,0,0.5398346583048502
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,2,128,1,float16,float16,0,1.0150400002797444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,2,128,1,float16,fp8,0,0.49369601408640545
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,4,128,1,float16,float16,0,0.6437706549962362
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,4,128,1,float16,fp8,0,0.5797173182169596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,float16,0,1.1654880046844482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,0.580074667930603
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,float16,0,0.6409653425216675
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,0,0.009237333511312803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,1,128,1,float16,float16,0,2.1075092951456704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,1,128,1,float16,fp8,0,0.4744960069656372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,2,128,1,float16,fp8,0,1.8585119247436523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,2,128,1,float16,float16,0,0.5600693225860596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,4,128,1,float16,float16,0,1.192138671875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,float16,0,1.3900480270385742
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,1,128,1,float16,float16,0,1.1220746835072835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,1,128,1,float16,fp8,0,0.9135039647420248
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,2,128,1,float16,float16,0,1.1700639724731445
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,2,128,1,float16,fp8,0,0.9806826909383138
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,2,128,1,float16,float16,0,2.186469395955404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,4,128,1,float16,float16,0,1.383354663848877
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,4,128,1,float16,fp8,0,1.1734506289164226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,float16,0,1.346186637878418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,4,128,1,float16,fp8,0,2.2242453893025718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,float16,0,0.7232267061869303
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,1,128,1,float16,float16,0,0.5867040157318115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,1,128,1,float16,fp8,0,0.5023306608200073
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,1,128,1,float16,fp8,0,1.7336479822794597
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,2,128,1,float16,float16,0,0.6133013168970743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,2,128,1,float16,fp8,0,0.5412533283233643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,4,128,1,float16,float16,0,0.7230079968770345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,4,128,1,float16,fp8,0,0.6539520025253296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,4,128,1,float16,float16,0,2.6988372802734375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,1.2513173421223958
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,float16,0,0.7220160166422526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,0.6588639815648397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,0.6577706734339396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,1,128,1,float16,float16,0,0.33577601114908856
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,1,128,1,float16,fp8,0,0.3055093288421631
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,2,128,1,float16,float16,0,0.34462932745615643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,0.5793439944585165
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,4,128,1,float16,fp8,0,0.3707573413848877
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,float16,0,0.40484801928202313
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,0.38384532928466797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,1,128,1,float16,float16,0,1.5292852719624836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,1,128,1,float16,fp8,0,1.2462560335795085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,2,128,1,float16,float16,0,1.5756905873616536
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,float16,0,0.40564266840616864
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,0.37935467561086017
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,2,128,1,float16,fp8,0,1.3535040219624836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,4,128,1,float16,float16,0,1.9620854059855144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,2,128,1,float16,fp8,0,0.31414933999379474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,1,128,1,float16,float16,0,0.8007733027140299
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,1,128,1,float16,fp8,0,0.6677707036336263
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,4,128,1,float16,float16,0,0.4081653356552124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,2,128,1,float16,float16,0,0.8315680027008057
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,4,128,1,float16,float16,0,1.0212480227152507
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,4,128,1,float16,fp8,0,0.9160586992899576
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,1.2857120037078857
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,float16,0,1.0050506591796875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,float16,0,0.5377653439839681
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,0.9253439903259277
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,0.5071039994557699
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,2,128,1,float16,float16,0,0.4485280116399129
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,4,128,1,float16,fp8,0,1.756432056427002
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,0.9452373186747233
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,4,128,1,float16,float16,0,0.5437066555023193
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,4,128,1,float16,fp8,0,0.49852800369262695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,2,128,1,float16,fp8,0,0.7220266660054525
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,0.5045013427734375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,0.29235732555389404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,float16,0,0.32237333059310913
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,1,128,1,float16,float16,0,0.25788267453511554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,2,128,1,float16,float16,0,0.26902933915456134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,2,128,1,float16,fp8,0,0.24597867329915366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,1,128,1,float16,float16,0,0.4268053372701009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,4,128,1,float16,fp8,0,0.2868799964586894
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,1,128,1,float16,fp8,0,0.3784000078837077
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,float16,0,0.32099733750025433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,float16,0,1.007962703704834
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,1,128,1,float16,float16,0,1.934874693552653
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,1,128,1,float16,fp8,0,1.6221706072489421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,2,128,1,float16,float16,0,2.093029340108236
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,1,128,1,float16,fp8,0,0.2319200038909912
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,2,128,1,float16,fp8,0,1.7932693163553874
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,4,128,1,float16,float16,0,0.29897065957387287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,4,128,1,float16,float16,0,2.757205327351888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,2,128,1,float16,fp8,0,0.4009173313776652
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,float16,0,1.4301120440165203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,1.3270506858825684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,1,128,1,float16,fp8,0,0.8489440282185873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,2,128,1,float16,float16,0,1.0999786853790283
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,2,128,1,float16,fp8,0,0.9295093218485514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,4,128,1,float16,float16,0,1.3996373812357585
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,4,128,1,float16,fp8,0,1.2146613597869873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,float16,0,1.3621226946512859
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,1,128,1,float16,float16,0,0.539242664972941
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,1,128,1,float16,float16,0,1.002794663111369
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,1,128,1,float16,fp8,0,0.46107200781504315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,2,128,1,float16,float16,0,0.5662186543146769
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,float16,0,0.5446613232294718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,2,128,1,float16,fp8,0,0.5047359863917033
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,4,128,1,float16,fp8,0,0.6399786472320557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,float16,0,0.7219680150349935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,0.7104000250498453
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,float16,0,0.4046613375345866
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,4,128,1,float16,fp8,0,2.620709260304769
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,0.37003199259440106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,1,128,1,float16,float16,0,0.2957386573155721
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,2,128,1,float16,float16,0,0.30870399872461957
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,2,128,1,float16,fp8,0,0.27966399987538654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,4,128,1,float16,float16,0,0.39866665999094647
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,4,128,1,float16,float16,0,0.7176053524017334
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,4,128,1,float16,fp8,0,0.3698933521906535
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,float16,0,0.3896533250808716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,0.3777173360188802
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,float16,0,0.23535466194152832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,1,128,1,float16,float16,0,0.18803733587265015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,0.29842134316762287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,2,128,1,float16,float16,0,0.19332265853881836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,2,128,1,float16,fp8,0,0.18254933754603067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,4,128,1,float16,float16,0,0.2174826661745707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,4,128,1,float16,fp8,0,0.2136426568031311
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,float16,0,0.23120532433191934
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,0.21434134244918823
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,float16,0,0.7372907002766927
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,0.21596266825993857
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,1,128,1,float16,fp8,0,0.17176532745361328
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,2,128,1,float16,float16,0,1.2694026629130046
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,1,128,1,float16,fp8,0,0.2656373381614685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,2,128,1,float16,fp8,0,1.1126240094502766
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,4,128,1,float16,float16,0,1.7668906847635906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,4,128,1,float16,fp8,0,1.501530647277832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,float16,0,0.8769813378651937
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,1.3275146484375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,1,128,1,float16,float16,0,0.6004586617151896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,1,128,1,float16,fp8,0,0.521776000658671
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,2,128,1,float16,float16,0,0.6665493249893188
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,2,128,1,float16,fp8,0,0.5837119817733765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,1,128,1,float16,float16,0,1.1567573547363281
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,float16,0,0.8785706361134847
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,0.8621226946512858
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,0.44846399625142414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,1,128,1,float16,fp8,0,0.2860533396402995
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,0.667184034983317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,2,128,1,float16,float16,0,0.35423465569814044
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,0.8632853031158447
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,2,128,1,float16,fp8,0,0.3221919933954875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,4,128,1,float16,float16,0,0.4792160193125407
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,4,128,1,float16,fp8,0,0.4511733452479045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,float16,0,0.45902399222056073
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,0.447002649307251
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,float16,0,0.2571306626001994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,float16,0,0.46214401721954346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,0.2504853407541911
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,1,128,1,float16,float16,0,0.3280906677246094
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,2,128,1,float16,fp8,0,0.1862186590830485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,4,128,1,float16,float16,0,0.2568426728248596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,4,128,1,float16,fp8,0,0.22985599438349405
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,0.24888533353805542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,1,128,1,float16,fp8,0,0.981706698735555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,0.14655466874440512
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,1,128,1,float16,float16,0,0.1888213356335958
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,1,128,1,float16,float16,0,0.12570666273434958
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,1,128,1,float16,fp8,0,0.1727679967880249
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,1,128,1,float16,fp8,0,0.11398933331171672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,2,128,1,float16,float16,0,0.1304426689942678
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,2,128,1,float16,fp8,0,0.12092266480127971
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,4,128,1,float16,fp8,0,0.14435733358065286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,float16,0,0.14686399698257446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,4,128,1,float16,float16,0,0.9089546998341879
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,1,128,1,float16,float16,0,1.1270986398061116
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,1,128,1,float16,fp8,0,0.9583253065745035
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,2,128,1,float16,float16,0,0.19735999902089438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,2,128,1,float16,float16,0,1.2605013052622478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,4,128,1,float16,float16,0,0.1474666694800059
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,2,128,1,float16,fp8,0,1.1302506923675537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,0.14648000399271646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,4,128,1,float16,fp8,0,1.8012213706970215
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,0.8993173440297445
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,4,128,1,float16,fp8,0,0.7762026786804199
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,1,128,1,float16,float16,0,0.5689173142115275
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,2,128,1,float16,float16,0,0.6408106486002604
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,2,128,1,float16,fp8,0,0.5921226739883423
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,4,128,1,float16,float16,0,0.9803840319315592
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,4,128,1,float16,fp8,0,0.8746773401896158
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,float16,0,0.9066932996114095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,0.9759199619293213
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,float16,0,0.4874773422876994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,0.47740264733632404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,1,128,1,float16,float16,0,0.3062559962272644
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,float16,0,1.0042986869812012
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,2,128,1,float16,float16,0,0.3445119857788086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,2,128,1,float16,fp8,0,0.32181866963704425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,1,128,1,float16,fp8,0,0.5006133317947388
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,4,128,1,float16,float16,0,0.48926933606465656
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,float16,0,0.5130026737848917
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,float16,0,0.2792639931042989
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,0.25941866636276245
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,1,128,1,float16,float16,0,0.16926934321721396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,1,128,1,float16,fp8,0,0.15919466813405356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,2,128,1,float16,float16,0,0.18917866547902426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,float16,0,0.2605973283449809
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,2,128,1,float16,fp8,0,0.17332265774408975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,1,128,1,float16,fp8,0,0.275546669960022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,float16,0,0.14680533607800803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,float16,0,0.2672160069147746
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,0.2595840096473694
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,float16,0,0.1521813372770945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,0.14416533708572388
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,1,128,1,float16,float16,0,0.1104746659596761
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,1,128,1,float16,fp8,0,0.10273067156473796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,2,128,1,float16,float16,0,0.11745599905649821
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,4,128,1,float16,float16,0,0.14064533511797586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,4,128,1,float16,fp8,0,0.14314132928848267
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,0.14266133308410645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,4,128,1,float16,float16,0,1.8844960530598958
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,4,128,1,float16,float16,0,0.27313599983851117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,float16,0,0.08458667000134786
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.0867199997107188
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,4,128,1,float16,fp8,0,0.2616960008939107
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,1,128,1,float16,fp8,0,0.06411199768384297
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,2,128,1,float16,fp8,0,0.06870933373769124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,0.47813868522644043
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,2,128,1,float16,fp8,0,0.11123200257619222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,4,128,1,float16,fp8,0,0.08479467034339905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,float16,0,0.08489066362380981
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.086709330479304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,float16,0,0.1532799998919169
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,1,128,1,float16,float16,0,0.6776159604390463
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,1,128,1,float16,fp8,0,0.6043573220570883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,2,128,1,float16,float16,0,0.7834186553955078
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,2,128,1,float16,float16,0,0.07235200206438701
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,4,128,1,float16,fp8,0,0.4756053288777669
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,4,128,1,float16,fp8,0,1.1033226648966472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,float16,0,0.6128160158793131
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,4,128,1,float16,float16,0,1.271135965983073
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,1,128,1,float16,float16,0,0.352512001991272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,1,128,1,float16,fp8,0,0.32423466444015503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,2,128,1,float16,float16,0,0.4036266803741455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,2,128,1,float16,fp8,0,0.38661332925160724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,4,128,1,float16,float16,0,0.6201173464457194
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,4,128,1,float16,fp8,0,0.5774133205413818
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,4,128,1,float16,float16,0,0.08371200164159139
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,0.615941325823466
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,0.339354674021403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,1,128,1,float16,float16,0,0.20130133628845215
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,1,128,1,float16,fp8,0,0.1758613387743632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,2,128,1,float16,float16,0,0.22265599171320596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,0.6753333409627279
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,2,128,1,float16,fp8,0,0.21364800135294595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,4,128,1,float16,fp8,0,0.3328426678975423
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,4,128,1,float16,float16,0,0.33602134386698407
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,0.3390186627705892
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,0.18357867002487183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,1,128,1,float16,float16,0,0.11551466584205627
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,2,128,1,float16,fp8,0,0.7342613538106283
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,2,128,1,float16,float16,0,0.12361600001653035
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,2,128,1,float16,fp8,0,0.11966400345166524
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,4,128,1,float16,float16,0,0.17249600092569986
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,4,128,1,float16,fp8,0,0.1730453372001648
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,float16,0,0.18567466735839844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,0.18386665980021158
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,float16,0,0.3290026585261027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,float16,0,0.09693866968154907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.1032533347606659
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,float16,0,0.18255466222763062
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,2,128,1,float16,float16,0,0.08002666632334392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,2,128,1,float16,fp8,0,0.07679999868075053
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,4,128,1,float16,float16,0,0.09749333063761394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,float16,0,0.658517320950826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,float16,0,0.09846400221188863
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,float16,0,0.06667733192443848
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.06779733300209045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,1,128,1,float16,float16,0,0.05517866710821787
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,1,128,1,float16,float16,0,0.07483200232187907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,1,128,1,float16,fp8,0,0.05215999980767568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,2,128,1,float16,float16,0,0.05640000104904175
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,1,128,1,float16,fp8,0,0.10717866818110149
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,2,128,1,float16,fp8,0,0.055215999484062195
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,4,128,1,float16,float16,0,0.06436799963315327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,float16,0,0.3476159969965617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.06833066542943318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,float16,0,0.06648533542950948
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,1,128,1,float16,float16,0,0.6944693724314371
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,1,128,1,float16,fp8,0,0.623093326886495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,2,128,1,float16,float16,0,0.813157320022583
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,1,128,1,float16,fp8,0,0.06966933111349742
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,4,128,1,float16,float16,0,1.5139360427856445
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,4,128,1,float16,fp8,0,0.10017066200574239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,4,128,1,float16,fp8,0,0.06624533236026764
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,4,128,1,float16,fp8,0,1.565450668334961
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.10290132959683736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,1,128,1,float16,fp8,0,0.33105067412058514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,2,128,1,float16,float16,0,0.4193013509114583
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,2,128,1,float16,fp8,0,0.4109119971593221
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,4,128,1,float16,float16,0,0.7524267037709554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,4,128,1,float16,fp8,0,0.7015573183695475
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,1,128,1,float16,float16,0,0.0684853345155716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,0.7178826332092285
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,float16,0,0.3749813238779704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,0.38179198900858563
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,1,128,1,float16,float16,0,0.19108800093332926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,2,128,1,float16,float16,0,0.23308799664179483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,1,128,1,float16,float16,0,0.3573919932047526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,4,128,1,float16,float16,0,0.37304532527923584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,4,128,1,float16,fp8,0,0.35067200660705566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,float16,0,0.3869813283284505
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,2,128,1,float16,fp8,0,0.7870079676310221
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,float16,0,0.6892906824747721
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,float16,0,0.1959786613782247
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,0.203274667263031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,1,128,1,float16,float16,0,0.10657067100207011
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,1,128,1,float16,fp8,0,0.17914666732152304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,2,128,1,float16,fp8,0,0.1160640021165212
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,4,128,1,float16,float16,0,0.21025600035985312
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,4,128,1,float16,fp8,0,0.19794134298960367
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,float16,0,0.203439990679423
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,0.20992000897725424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,float16,0,0.11090667049090068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,0.42457600434621173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,1,128,1,float16,float16,0,0.06856533388296764
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,1,128,1,float16,fp8,0,0.06428800026575725
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,2,128,1,float16,float16,0,0.07459733386834462
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,float16,0,0.7697013219197592
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,1,128,1,float16,fp8,0,0.10101866722106934
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,2,128,1,float16,float16,0,0.1237493356068929
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,2,128,1,float16,fp8,0,0.22051199277242026
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,float16,0,0.11242666840553284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.1081813375155131
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,float16,0,0.058330665032068886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.10617066423098247
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,1,128,1,float16,float16,0,0.04288533329963684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,1,128,1,float16,fp8,0,0.04005866746107737
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,2,128,1,float16,fp8,0,0.045824001232783
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,2,128,1,float16,fp8,0,0.07455466687679291
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,4,128,1,float16,float16,0,0.057664001981417336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,4,128,1,float16,float16,0,0.09854400157928467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,0.7184480031331381
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.06257066627343495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,float16,0,0.045893331368764244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.04789866507053375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,1,128,1,float16,float16,0,0.03982399900754293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,2,128,1,float16,float16,0,0.04066666712363561
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,2,128,1,float16,fp8,0,0.03973866750796636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,4,128,1,float16,float16,0,0.04585599899291992
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,2,128,1,float16,float16,0,0.0460746685663859
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.04799999793370565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,4,128,1,float16,fp8,0,0.10618133346239726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,1,128,1,float16,float16,0,0.43935465812683105
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,float16,0,0.0581279993057251
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,1,128,1,float16,fp8,0,0.4143253167470296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,2,128,1,float16,float16,0,0.5491573413213094
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,2,128,1,float16,fp8,0,0.5378559827804565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,4,128,1,float16,fp8,0,0.04585599899291992
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,float16,0,0.04613866905371348
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,float16,0,0.5346239805221558
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,0.5648373365402222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,1,128,1,float16,float16,0,0.23722134033838907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,1,128,1,float16,fp8,0,0.21971199909845987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,2,128,1,float16,fp8,0,0.28644800186157227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,4,128,1,float16,float16,0,0.49984534581502277
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.0641653339068095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,4,128,1,float16,fp8,0,0.5886826515197754
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,4,128,1,float16,float16,0,1.0483307043711345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,float16,0,0.2694080074628194
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,4,128,1,float16,fp8,0,1.0006399949391682
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,1,128,1,float16,float16,0,0.13131200273831686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,1,128,1,float16,fp8,0,0.11731732885042827
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,4,128,1,float16,fp8,0,0.06168533364931742
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,2,128,1,float16,float16,0,0.1606933375199636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,2,128,1,float16,float16,0,0.28273600339889526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,4,128,1,float16,float16,0,0.2691146731376648
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,4,128,1,float16,fp8,0,0.2839893301328023
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,float16,0,0.26315200328826904
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,0.2737013300259908
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,float16,0,0.1402400036652883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.14853333433469137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,1,128,1,float16,fp8,0,0.07133333384990692
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,2,128,1,float16,float16,0,0.08752000331878662
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,2,128,1,float16,fp8,0,0.08480532964070638
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,0.27034133672714233
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,4,128,1,float16,float16,0,0.13993066549301147
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,float16,0,0.14061333735783896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,2,128,1,float16,fp8,0,0.1528320014476776
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.14891733725865683
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,float16,0,0.07272533575693767
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,1,128,1,float16,float16,0,0.04849066833655039
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,1,128,1,float16,fp8,0,0.03753600021203359
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,1,128,1,float16,fp8,0,0.04738666613896688
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,2,128,1,float16,fp8,0,0.05398400127887726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,4,128,1,float16,float16,0,0.07070399820804596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,0.5077653328577677
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,float16,0,0.07144533097743988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,float16,0,0.045706664522488914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.04987733562787374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,1,128,1,float16,float16,0,0.03562133262554804
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.08065066734949748
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,1,128,1,float16,fp8,0,0.03350933392842611
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,2,128,1,float16,float16,0,0.05399466554323832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,2,128,1,float16,float16,0,0.03775466730197271
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,2,128,1,float16,fp8,0,0.03760000069936117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,4,128,1,float16,fp8,0,0.04804799954096476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,float16,0,0.5343466599782308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,float16,0,0.04610133171081543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,4,128,1,float16,fp8,0,0.07712000111738841
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.03967999915281931
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,4,128,1,float16,fp8,0,0.133050670226415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,1,128,1,float16,fp8,0,0.03140799949566523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,2,128,1,float16,fp8,0,0.033610666791598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,4,128,1,float16,float16,0,0.037621334195137024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,4,128,1,float16,fp8,0,0.037589333951473236
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,1,128,1,float16,float16,0,0.07445333401362102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,float16,0,0.03750933210055033
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.03974399964014689
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.05008000135421753
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.07990933458010356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,1,128,1,float16,float16,0,0.03181333343187968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,1,128,1,float16,float16,0,0.47939733664194745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,2,128,1,float16,float16,0,0.033610666791598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,1,128,1,float16,fp8,0,0.45422399044036865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,2,128,1,float16,float16,0,0.6096959908803304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,2,128,1,float16,fp8,0,0.6234613259633383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,float16,0,0.625711997350057
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,1,128,1,float16,float16,0,0.2480000058809916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,4,128,1,float16,float16,0,0.04572266836961111
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,1,128,1,float16,fp8,0,0.24431467056274414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,2,128,1,float16,float16,0,0.32369067271550495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,2,128,1,float16,fp8,0,0.3213919997215271
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,4,128,1,float16,float16,0,0.6060959895451864
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,4,128,1,float16,fp8,0,0.7283306916554769
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,float16,0,0.5825813213984171
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,0.7063199679056803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,0.32975467046101886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,float16,0,0.34116268157958984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,0.6269226471583048
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,1,128,1,float16,float16,0,0.14060800274213156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,1,128,1,float16,fp8,0,0.13079999883969626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,2,128,1,float16,float16,0,0.1742560068766276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,2,128,1,float16,fp8,0,0.17187732458114624
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,float16,0,0.037791999677817024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,4,128,1,float16,float16,0,0.31543999910354614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,float16,0,0.34111467997233075
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,1,128,1,float16,float16,0,0.07283199826876323
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,1,128,1,float16,fp8,0,0.07252266506354015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,2,128,1,float16,float16,0,0.08893866340319316
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,2,128,1,float16,fp8,0,0.08749333024024963
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,4,128,1,float16,float16,0,0.16491732994715372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,4,128,1,float16,fp8,0,0.17912532885869345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,float16,0,0.17068799336751303
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.1725013256072998
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,float16,0,0.0883840024471283
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.09001066287358601
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,float16,0,0.16895999511082968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,1,128,1,float16,float16,0,0.048010667165120445
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,2,128,1,float16,float16,0,0.05467733244101206
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,4,128,1,float16,float16,0,0.07653333246707916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,4,128,1,float16,fp8,0,0.08709333340326945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,float16,0,0.08917333682378133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,4,128,1,float16,fp8,0,0.30406399567921955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.09056533376375835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.051813334226608276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,0.33297065893809
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,1,128,1,float16,float16,0,0.029498666524887085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,2,128,1,float16,float16,0,0.033439998825391136
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,2,128,1,float16,fp8,0,0.033573334415753685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,4,128,1,float16,float16,0,1.2794880072275798
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,4,128,1,float16,float16,0,0.0439573327700297
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,4,128,1,float16,fp8,0,0.050000001986821495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,float16,0,0.04518933097521464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.17270932594935098
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.05188799897829691
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,2,128,1,float16,fp8,0,0.05611200133959452
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,1,128,1,float16,float16,0,0.027295999228954315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,1,128,1,float16,fp8,0,0.025653332471847534
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,2,128,1,float16,float16,0,0.027952000498771667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,4,128,1,float16,fp8,0,1.3550079663594563
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,2,128,1,float16,fp8,0,0.027376001079877216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,float16,0,0.033615998923778534
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,4,128,1,float16,fp8,0,0.03550933301448822
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.03726933399836222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,float16,0,0.029258665939172108
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,1,128,1,float16,fp8,0,0.04614399870236715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,float16,0,0.0458133320013682
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,1,128,1,float16,fp8,0,0.023610666394233704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,1,128,1,float16,float16,0,0.02532800038655599
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,2,128,1,float16,float16,0,0.02569066733121872
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,4,128,1,float16,float16,0,0.028618666032950085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,4,128,1,float16,fp8,0,0.029535998900731403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,float16,0,0.029359998802344005
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.02940800040960312
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.029696000119050343
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,1,128,1,float16,float16,0,0.37758398056030273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,1,128,1,float16,fp8,0,0.3681866725285848
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,1,128,1,float16,fp8,0,0.0295413335164388
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,2,128,1,float16,fp8,0,0.025557334224383037
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,4,128,1,float16,float16,0,0.03366400053103765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,2,128,1,float16,float16,0,0.5030773480733236
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,2,128,1,float16,fp8,0,0.5315786600112915
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,4,128,1,float16,float16,0,1.1929173469543457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,float16,0,0.5713226795196533
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,1,128,1,float16,float16,0,0.19795199235280356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,1,128,1,float16,fp8,0,0.19799466927846274
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,0.6559360027313232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,4,128,1,float16,fp8,0,1.2952533562978108
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,2,128,1,float16,float16,0,0.26398932933807373
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,2,128,1,float16,fp8,0,0.28006933132807416
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,4,128,1,float16,float16,0,0.6071786483128866
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,4,128,1,float16,fp8,0,0.5569119850794474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,float16,0,0.2795093258221944
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.3069760004679362
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,1,128,1,float16,float16,0,0.1088853379090627
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,2,128,1,float16,float16,0,0.1420693298180898
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,float16,0,0.033520000676314034
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,4,128,1,float16,float16,0,0.2956533432006836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,4,128,1,float16,fp8,0,0.3076159954071045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,float16,0,0.3173866669336955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,float16,0,0.14959999918937683
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,1,128,1,float16,float16,0,0.058186665177345276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,1,128,1,float16,fp8,0,0.10628799597422282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,2,128,1,float16,float16,0,0.07748800019423167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,2,128,1,float16,fp8,0,0.07067733506361644
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,4,128,1,float16,float16,0,0.15981333454449972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,float16,0,0.5722399950027466
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,0.5874346494674683
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,4,128,1,float16,fp8,0,0.16425066192944845
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.03623999903599421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,float16,0,0.15249600013097128
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.08417066931724548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.3482346534729004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,1,128,1,float16,float16,0,0.03700266778469086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,2,128,1,float16,float16,0,0.04390933116277059
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.1607093314329783
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,2,128,1,float16,fp8,0,0.046021332343419395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,1,128,1,float16,fp8,0,0.05805333455403646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,4,128,1,float16,float16,0,0.06655466556549072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,4,128,1,float16,fp8,0,0.07865066826343536
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,2,128,1,float16,fp8,0,0.15101333459218344
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,float16,0,0.08538666367530823
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.0794293334086736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,float16,0,0.08294400076071422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,1,128,1,float16,fp8,0,0.023306667804718018
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,2,128,1,float16,float16,0,0.025733334322770435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,2,128,1,float16,fp8,0,0.029338667790095013
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,4,128,1,float16,float16,0,0.03754666695992152
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,1,128,1,float16,fp8,0,0.0376800000667572
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.045909335215886436
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.030853333572546642
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,1,128,1,float16,float16,0,0.01916266605257988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.1588479975859324
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,1,128,1,float16,fp8,0,0.01998399943113327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,float16,0,0.03781333317359289
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,2,128,1,float16,fp8,0,0.021253332495689392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.04580800235271454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,4,128,1,float16,fp8,0,0.029418667157491047
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,float16,0,0.027386667827765148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,4,128,1,float16,fp8,0,0.04378666480382284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.03129599988460541
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,float16,0,0.022272000710169475
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.02359466751416524
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,1,128,1,float16,float16,0,0.019050666441520054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,float16,0,0.027466667195161183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,2,128,1,float16,float16,0,0.01915733392039935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,4,128,1,float16,float16,0,0.021344001094500225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,4,128,1,float16,fp8,0,0.023306667804718018
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,float16,0,0.023018665611743927
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,1,128,1,float16,float16,0,0.023226665953795116
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,4,128,1,float16,float16,0,0.02754133443037669
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,1,128,1,float16,float16,0,0.018112000077962875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,float16,0,0.03794133414824804
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,1,128,1,float16,fp8,0,0.017242666333913803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,1,128,1,float16,fp8,0,0.017674667139848072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,2,128,1,float16,float16,0,0.017893332988023758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,2,128,1,float16,fp8,0,0.01923199991385142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,4,128,1,float16,float16,0,0.01934933289885521
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.019706666469573975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.02349333216746648
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,2,128,1,float16,float16,0,0.021344001094500225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.02086399992307027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,1,128,1,float16,float16,0,0.1671733260154724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,1,128,1,float16,fp8,0,0.1720693310101827
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,2,128,1,float16,fp8,0,0.017637333522240322
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,4,128,1,float16,fp8,0,0.020213333268960316
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,2,128,1,float16,float16,0,0.24275734027226767
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,float16,0,0.019466667125622433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,2,128,1,float16,fp8,0,0.24973867336908975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,float16,0,0.300165335337321
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,1,128,1,float16,float16,0,0.09147199988365173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.28884265820185345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,float16,0,0.019578666736682255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,2,128,1,float16,float16,0,0.13315199812253317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,4,128,1,float16,float16,0,0.28781867027282715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,float16,0,0.26765867074330646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.3260586659113566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,float16,0,0.13826133807500204
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.15266666809717813
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,4,128,1,float16,fp8,0,0.6477386554082235
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,1,128,1,float16,fp8,0,0.05151999990145365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,2,128,1,float16,float16,0,0.06867200136184692
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,1,128,1,float16,fp8,0,0.0904960036277771
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,4,128,1,float16,float16,0,0.15266133348147073
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,2,128,1,float16,fp8,0,0.06445333361625671
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,4,128,1,float16,fp8,0,0.13657066226005554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,2,128,1,float16,fp8,0,0.13195199767748514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,float16,0,0.0786293347676595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.15096533298492432
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,1,128,1,float16,float16,0,0.029520000020662945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,1,128,1,float16,fp8,0,0.03152533372243246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,2,128,1,float16,float16,0,0.03763733307520548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,2,128,1,float16,fp8,0,0.04038399954636892
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,1,128,1,float16,float16,0,0.050000001986821495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,float16,0,0.07832000156243642
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.07317333420117696
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,float16,0,0.03573866685231527
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,float16,0,0.13937066992123923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,1,128,1,float16,float16,0,0.019333332777023315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.07468800246715546
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,1,128,1,float16,fp8,0,0.019498666127522785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,4,128,1,float16,float16,0,0.03372266640265783
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,4,128,1,float16,float16,0,0.5650026798248291
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,4,128,1,float16,fp8,0,0.04135466615358988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,4,128,1,float16,fp8,0,0.07257066667079926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.041946664452552795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,float16,0,0.025301332275072735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.027669332921504974
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.04204266766707102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,1,128,1,float16,float16,0,0.01721599946419398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,1,128,1,float16,fp8,0,0.017162666966517765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,2,128,1,float16,fp8,0,0.019237333287795384
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,4,128,1,float16,float16,0,0.02332799881696701
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,4,128,1,float16,float16,0,0.06012799839178721
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,float16,0,0.02532800038655599
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.0273333340883255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,4,128,1,float16,fp8,0,0.2659839987754822
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,float16,0,0.019120000302791595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,2,128,1,float16,float16,0,0.023370665808518726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.021183999876181286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,2,128,1,float16,fp8,0,0.025301332275072735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,2,128,1,float16,float16,0,0.01616000011563301
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,2,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,4,128,1,float16,fp8,0,0.021301334102948506
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,4,128,1,float16,fp8,0,0.027429332335789997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,float16,0,0.019146667172511418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.02125866711139679
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,float16,0,0.03569599986076355
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.0170666662355264
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,1,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,2,128,1,float16,float16,0,0.01498666654030482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,2,128,1,float16,fp8,0,0.015237333873907724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,4,128,1,float16,float16,0,0.01711999997496605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,2,128,1,float16,float16,0,0.019173332800467808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,4,128,1,float16,fp8,0,0.017301333447297413
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,1,128,1,float16,float16,0,0.015168000012636185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,1,128,1,float16,fp8,0,0.015301333119471868
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,float16,0,0.01515199989080429
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.015082667271296183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,1,128,1,float16,float16,0,0.015098666151364645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,1,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,2,128,1,float16,float16,0,0.014970666418472925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,4,128,1,float16,float16,0,0.015205333630243937
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,4,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,float16,0,0.01711999997496605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,float16,0,0.01515199989080429
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.014981333166360855
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,float16,0,0.017162666966517765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,1,128,1,float16,float16,0,0.10220266381899516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,1,128,1,float16,fp8,0,0.10153599580128987
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,2,128,1,float16,float16,0,0.1339040001233419
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,4,128,1,float16,float16,0,0.019178666174411774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,2,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,float16,0,0.1436853309472402
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,2,128,1,float16,fp8,0,0.147189329067866
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.15335466464360556
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,1,128,1,float16,float16,0,0.05299733579158783
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,2,128,1,float16,float16,0,0.07234666744867961
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,2,128,1,float16,fp8,0,0.06866666674613953
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,4,128,1,float16,float16,0,0.15363199512163797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,4,128,1,float16,fp8,0,0.15626666943232217
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,float16,0,0.13916266957918802
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,float16,0,0.08026133477687836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.0751093327999115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,1,128,1,float16,float16,0,0.03246400008598963
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,1,128,1,float16,fp8,0,0.03356799980004629
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,2,128,1,float16,float16,0,0.03965866565704346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,2,128,1,float16,fp8,0,0.043680002291997276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,4,128,1,float16,float16,0,0.062261333068211876
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,1,128,1,float16,fp8,0,0.054005334774653115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,float16,0,0.07976533472537994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,float16,0,0.0355679988861084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.15714133779207864
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,1,128,1,float16,float16,0,0.020901332298914593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,1,128,1,float16,float16,0,0.014885333677132925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,2,128,1,float16,float16,0,0.023520000278949738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,4,128,1,float16,float16,0,0.2887679934501648
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,2,128,1,float16,fp8,0,0.02741333345572154
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,4,128,1,float16,fp8,0,0.07442133128643036
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,4,128,1,float16,fp8,0,0.041984001795450844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,float16,0,0.035504000882307686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.07493333518505096
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.025279998779296875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,1,128,1,float16,float16,0,0.015130666395028433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,1,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,2,128,1,float16,float16,0,0.016442666451136272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,2,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,1,128,1,float16,fp8,0,0.02141333371400833
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,4,128,1,float16,fp8,0,0.025290665527184803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,4,128,1,float16,fp8,0,0.2784000039100647
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,4,128,1,float16,float16,0,0.03543466577927271
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.025237334271272022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,float16,0,0.01729600007335345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,1,128,1,float16,float16,0,0.01309866706530253
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.043663998444875084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,2,128,1,float16,float16,0,0.013034666577974955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,2,128,1,float16,fp8,0,0.015114666273196539
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,4,128,1,float16,fp8,0,0.019141333798567455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,4,128,1,float16,float16,0,0.021216000119845074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.019098666807015736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,float16,0,0.02141333371400833
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.019093333433071773
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.015247999380032221
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,1,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,1,128,1,float16,float16,0,0.012986666212479273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,float16,0,0.02162133405605952
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,2,128,1,float16,float16,0,0.012842666357755661
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,1,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,4,128,1,float16,float16,0,0.013162666310866674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,4,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,float16,0,0.013264000415802002
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.015205333630243937
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,float16,0,0.01704000060757001
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,float16,0,0.012853333105643591
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,1,128,1,float16,float16,0,0.011962667107582092
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,1,128,1,float16,fp8,0,0.012639999389648438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,float16,0,0.013669333110253016
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,2,128,1,float16,fp8,0,0.01314666618903478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,4,128,1,float16,float16,0,0.012229333321253458
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,4,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,float16,0,0.01302933320403099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,4,128,1,float16,float16,0,0.017130666722853977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.04374399781227112
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,1,128,1,float16,float16,0,0.01137599969903628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,2,128,1,float16,float16,0,0.012053333222866058
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.013269333789745966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,2,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,4,128,1,float16,float16,0,0.013034666577974955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,2,128,1,float16,float16,0,0.011994666109482447
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,float16,0,0.011813333878914515
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,2,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,1,128,1,float16,float16,0,0.07713599999745686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,1,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,2,128,1,float16,fp8,0,0.08912000060081482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,4,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,4,128,1,float16,fp8,0,0.17778132359186807
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,1,128,1,float16,float16,0,0.042122667034467064
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.08381866415341695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,1,128,1,float16,fp8,0,0.0738613357146581
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,float16,0,0.012655999511480331
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,2,128,1,float16,float16,0,0.04981866478919983
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,2,128,1,float16,fp8,0,0.05192000170548757
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,4,128,1,float16,float16,0,0.07526933153470357
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,4,128,1,float16,fp8,0,0.08278400202592213
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,4,128,1,float16,float16,0,0.17510932683944702
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,float16,0,0.0909440020720164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,float16,0,0.09074667096138
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,1,128,1,float16,float16,0,0.02665599932273229
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,2,128,1,float16,float16,0,0.030346666773160298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,1,128,1,float16,fp8,0,0.04181866844495138
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,2,128,1,float16,fp8,0,0.03145066648721695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,4,128,1,float16,float16,0,0.041637333730856575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,2,128,1,float16,float16,0,0.09452266494433086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,float16,0,0.04165333261092504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,float16,0,0.025413334369659424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.028688001135985058
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.08340266346931458
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.04926399886608124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,2,128,1,float16,float16,0,0.01923199991385142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,1,128,1,float16,fp8,0,0.027615999182065327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,2,128,1,float16,fp8,0,0.019226666539907455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,4,128,1,float16,fp8,0,0.027637332677841187
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,float16,0,0.025301332275072735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,4,128,1,float16,fp8,0,0.04780800143877665
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.029322666426499683
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.049914668003718056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,float16,0,0.01710933322707812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,2,128,1,float16,float16,0,0.015173333386580149
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,1,128,1,float16,float16,0,0.017557332913080852
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,2,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,4,128,1,float16,float16,0,0.01717866708834966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,4,128,1,float16,fp8,0,0.019253333409627277
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,float16,0,0.01725333308180173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.019071999937295914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,float16,0,0.013104000439246496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.01924266666173935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,2,128,1,float16,float16,0,0.011989332735538483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,1,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,2,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,4,128,1,float16,float16,0,0.013290667285521826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,4,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,float16,0,0.013104000439246496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.015141333142916361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,float16,0,0.011328000575304031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,1,128,1,float16,float16,0,0.01138666644692421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,1,128,1,float16,fp8,0,0.012063999970753988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,2,128,1,float16,float16,0,0.011274666835864386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,2,128,1,float16,fp8,0,0.012773333738247553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,4,128,1,float16,float16,0,0.0116799995303154
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,4,128,1,float16,fp8,0,0.0129120002190272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,float16,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,4,128,1,float16,float16,0,0.025424001117547352
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,1,128,1,float16,float16,0,0.010981333752473196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,1,128,1,float16,fp8,0,0.011301333705584208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,1,128,1,float16,float16,0,0.011472000430027643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,2,128,1,float16,float16,0,0.010938666760921478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,2,128,1,float16,fp8,0,0.011893333246310553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,4,128,1,float16,float16,0,0.011349332829316458
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,4,128,1,float16,fp8,0,0.012661332885424295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,float16,0,0.011002667248249054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,float16,0,0.012421333541472753
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.011402666568756104
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,float16,0,0.011312000453472137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.011285333583752314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,1,128,1,float16,float16,0,0.011045332998037338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,1,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,2,128,1,float16,float16,0,0.01098666712641716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,4,128,1,float16,float16,0,0.011231999844312668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,1,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,4,128,1,float16,fp8,0,0.011242666592200598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,float16,0,0.011461333682139715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.011600000162919363
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,1,128,1,float16,float16,0,0.06331199904282887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,1,128,1,float16,fp8,0,0.06136000156402588
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,2,128,1,float16,fp8,0,0.011050666371981302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,1,128,1,float16,float16,0,0.013210666676362356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,2,128,1,float16,fp8,0,0.07052800059318542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,float16,0,0.051957334081331887
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,4,128,1,float16,fp8,0,0.10247466961542766
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,float16,0,0.041850666205088295
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,1,128,1,float16,float16,0,0.037231999138991036
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.05717333157857259
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,1,128,1,float16,fp8,0,0.035530666510264076
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,2,128,1,float16,fp8,0,0.04020266731580099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,4,128,1,float16,float16,0,0.051327998439470925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,4,128,1,float16,fp8,0,0.05630399783452352
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,float16,0,0.05194666484991709
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.05704000095526377
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,float16,0,0.031530665854612984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.03377600014209747
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,1,128,1,float16,fp8,0,0.023354666928450268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,2,128,1,float16,float16,0,0.02535466601451238
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,1,128,1,float16,fp8,0,0.01748266691962878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,4,128,1,float16,float16,0,0.0305173322558403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,4,128,1,float16,fp8,0,0.033413333197434746
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,2,128,1,float16,float16,0,0.03988266736268997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.03366400053103765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,float16,0,0.03145066648721695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.02146666745344798
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,1,128,1,float16,float16,0,0.01653333380818367
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,1,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,2,128,1,float16,float16,0,0.01714133347074191
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,2,128,1,float16,fp8,0,0.017322666943073273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,4,128,1,float16,float16,0,0.09210667014122009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,2,128,1,float16,fp8,0,0.02550933261712392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,float16,0,0.01985599969824155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.02107733239730199
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.015237333873907724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,1,128,1,float16,float16,0,0.012335999558369318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,float16,0,0.019152000546455383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,2,128,1,float16,float16,0,0.012741333494583765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,2,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,4,128,1,float16,float16,0,0.013274667163689932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,4,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,float16,0,0.013034666577974955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,float16,0,0.010960000256697336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,1,128,1,float16,float16,0,0.010933333386977514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,float16,0,0.015157333264748255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,2,128,1,float16,float16,0,0.012063999970753988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,1,128,1,float16,fp8,0,0.01321600005030632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,2,128,1,float16,fp8,0,0.011493333925803503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,4,128,1,float16,float16,0,0.011898666620254517
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,4,128,1,float16,fp8,0,0.012154666086037954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,float16,0,0.011450666934251785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,float16,0,0.011152000476916632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.011087999989589056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,1,128,1,float16,float16,0,0.02332799881696701
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,1,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,2,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,1,128,1,float16,fp8,0,0.01268799975514412
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,float16,0,0.010981333752473196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.01163200040658315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,1,128,1,float16,float16,0,0.011168000598748526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,1,128,1,float16,float16,0,0.011168000598748526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,1,128,1,float16,fp8,0,0.01116266722480456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,2,128,1,float16,float16,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,4,128,1,float16,float16,0,0.010885333021481832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,4,128,1,float16,float16,0,0.011183999478816986
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,4,128,1,float16,fp8,0,0.011130666981140772
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,float16,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.011120000233252844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,float16,0,0.011098666737476984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.011194666226704916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,1,128,1,float16,float16,0,0.010965333630641302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,float16,0,0.011285333583752314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,2,128,1,float16,float16,0,0.011247999966144562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,2,128,1,float16,float16,0,0.07018666466077168
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,4,128,1,float16,float16,0,0.01098666712641716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,4,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.011066666493813196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,4,128,1,float16,fp8,0,0.011402666568756104
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,1,128,1,float16,float16,0,0.05846933523813883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,1,128,1,float16,fp8,0,0.011136000355084738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,2,128,1,float16,fp8,0,0.01119999960064888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,4,128,1,float16,float16,0,0.019813333948453266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,2,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,2,128,1,float16,float16,0,0.06229333579540253
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,float16,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,2,128,1,float16,fp8,0,0.060271998246510826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,4,128,1,float16,float16,0,0.07690666615962982
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,float16,0,0.04181333382924398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.04279466470082601
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,1,128,1,float16,float16,0,0.035536001125971474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,4,128,1,float16,fp8,0,0.07521066566308339
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,1,128,1,float16,fp8,0,0.0330826664964358
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,4,128,1,float16,fp8,0,0.021157334248224895
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,4,128,1,float16,float16,0,0.041936000188191734
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,float16,0,0.04182399809360504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.04376000165939331
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.02741866558790207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,1,128,1,float16,float16,0,0.02293333411216736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,1,128,1,float16,fp8,0,0.02117866774400075
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,2,128,1,float16,float16,0,0.023391999304294586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,1,128,1,float16,fp8,0,0.05569600065549215
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,2,128,1,float16,float16,0,0.03601066768169403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,4,128,1,float16,fp8,0,0.027466667195161183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.02738133321205775
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,2,128,1,float16,fp8,0,0.035232000052928925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,float16,0,0.01738133281469345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.017263999829689663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,float16,0,0.02532800038655599
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,1,128,1,float16,fp8,0,0.015253332753976187
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,2,128,1,float16,float16,0,0.01515199989080429
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,2,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,4,128,1,float16,fp8,0,0.017045332739750545
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,2,128,1,float16,fp8,0,0.02329600105683009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.01743999992807706
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,4,128,1,float16,float16,0,0.02569599946339925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,float16,0,0.012986666212479273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,1,128,1,float16,float16,0,0.01312000056107839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,float16,0,0.025381334125995636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,4,128,1,float16,fp8,0,0.04197333256403605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,2,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,1,128,1,float16,float16,0,0.015141333142916361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,4,128,1,float16,float16,0,0.01735466718673706
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,float16,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,1,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,float16,0,0.017152000218629837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,1,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,2,128,1,float16,float16,0,0.01108266661564509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,4,128,1,float16,float16,0,0.01126933346192042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,2,128,1,float16,float16,0,0.012367999802033106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,4,128,1,float16,fp8,0,0.011407999942700068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,float16,0,0.011328000575304031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,float16,0,0.011039999624093374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.011312000453472137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,1,128,1,float16,float16,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,1,128,1,float16,fp8,0,0.011205332974592844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,2,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,2,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,4,128,1,float16,fp8,0,0.011045332998037338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,1,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,4,128,1,float16,float16,0,0.01313599944114685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.011567999919255575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,float16,0,0.013104000439246496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.011434666812419891
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,1,128,1,float16,float16,0,0.010858666151762009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,1,128,1,float16,fp8,0,0.011285333583752314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,2,128,1,float16,float16,0,0.011007999380429586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,2,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,4,128,1,float16,float16,0,0.010879999647537867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,4,128,1,float16,float16,0,0.01121066634853681
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,float16,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.011002667248249054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,float16,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,4,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.010954666882753372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,1,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,float16,0,0.011050666371981302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,2,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,4,128,1,float16,float16,0,0.010944000134865442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,4,128,1,float16,fp8,0,0.011264000087976456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,float16,0,0.011077333241701126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,4,128,1,float16,fp8,0,0.011285333583752314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,0,0.011055999745925268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,1,128,1,float16,float16,0,0.01137599969903628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,2,128,1,float16,float16,0,0.011002667248249054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.011402666568756104
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,0,0.019589333484570186
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,0,0.01708799973130226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,0,0.019359999646743137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.01725333308180173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,0,0.01119999960064888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,0,0.01404800017674764
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,0,0.011157333850860596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,0,0.015040000279744467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,0,0.01907733331123988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.017221332838137943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,0,0.015119999647140503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,0,0.009679999823371569
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,0,0.010005333150426546
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,float16,0,0.010960000256697336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,0,0.014864000181357065
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.013280000537633896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,0,0.010938666760921478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,0,0.009253333633144697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,2,128,1,float16,float16,0,0.011034666250149408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,0,0.00891733355820179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,0,0.009930666536092758
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,0,0.010778666784365972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,0,0.009296000003814697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,0,0.01126933346192042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.011050666371981302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.009183999771873156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,0,0.008943999807039896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,0,0.011999999483426413
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,0,0.008869333192706108
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,0,0.009216000015536943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.009216000015536943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,0,0.00933333362142245
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,0,0.008965333302815756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,0,0.009258666386206945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,0,0.008912000184257826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,0,0.00916800027092298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,0,0.009450666606426239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,0,0.009066666786869368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,0,0.009114666531483332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.009093333035707474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.011087999989589056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,0,0.009216000015536943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,0,0.00903466654320558
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,0,0.00926399976015091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.009679999823371569
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,0,0.009306666751702627
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,0,0.009237333511312803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.009103999783595404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,0,0.009519999846816063
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,0,0.009189333145817121
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.009119999905427298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,0,0.00916800027092298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,0,0.009077333534757296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,0,0.009253333633144697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,0,0.009125333279371262
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,0,0.011066666493813196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,0,0.009178666397929192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,0,0.008943999807039896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,0,0.008997333546479544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,0,0.008874666566650072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,0,0.009183999771873156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,0,0.009008000294367472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,0,0.009194666519761086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,0,0.009189333145817121
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,1,128,1,float16,float16,0,1.9250346819559734
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,2,128,1,float16,float16,0,2.0317653020222983
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,0,0.008938666433095932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,2,128,1,float16,fp8,0,1.763269265492757
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,float16,0,1.4285972913106282
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,0,0.009136000027259191
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,1,128,1,float16,float16,0,1.0142347017923992
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,1,128,1,float16,fp8,0,0.8623413244883219
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,2,128,1,float16,float16,0,1.0847466786702473
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,1.2042986551920574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,0.7001866499582926
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,1,128,1,float16,fp8,0,1.5946027437845867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,1,128,1,float16,float16,0,0.562117338180542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,2,128,1,float16,float16,0,0.5993386507034302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,2,128,1,float16,fp8,0,0.5352426767349243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,float16,0,0.7512160142262777
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,0.6697866916656494
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,1.2329440116882324
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,float16,0,0.44575464725494385
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,2,128,1,float16,fp8,0,0.94596266746521
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,1,128,1,float16,fp8,0,0.3131573398907979
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,2,128,1,float16,float16,0,0.3554133176803589
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,2,128,1,float16,fp8,0,0.3311413327852885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,float16,0,0.44357868035634357
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,0.38150934378306073
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,1,128,1,float16,float16,0,1.1318453152974446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,1,128,1,float16,fp8,0,0.9659732977549235
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,0,0.00895999992887179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,2,128,1,float16,float16,0,1.2311680316925049
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,1,128,1,float16,float16,0,0.34300267696380615
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,2,128,1,float16,fp8,0,1.1006506284077961
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,float16,0,0.7571732997894287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,float16,0,0.9099520047505697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,0.8322933514912924
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,1,128,1,float16,fp8,0,0.5351200103759766
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,2,128,1,float16,float16,0,0.6615306536356608
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,float16,0,0.8865439891815186
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,0.823205312093099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,0.4474826653798421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,0.39739731947580975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,1,128,1,float16,float16,0,0.3429173231124878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,1,128,1,float16,fp8,0,0.31278934081395465
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,2,128,1,float16,float16,0,0.37197331587473553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,1,128,1,float16,float16,0,0.6080160140991211
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,1,128,1,float16,fp8,0,0.4901119867960612
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,float16,0,0.48875733216603595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,float16,0,0.2707413236300151
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,0.265882670879364
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,1,128,1,float16,float16,0,0.22395733992258707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,1,128,1,float16,fp8,0,0.20368534326553345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,float16,0,0.49458666642506915
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,float16,0,0.2722346584002177
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,0.265557328859965
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,float16,0,1.4070879618326824
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,1,128,1,float16,float16,0,0.8178026676177979
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,2,128,1,float16,fp8,0,0.3319786588350932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,1,128,1,float16,fp8,0,0.7125173409779867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,2,128,1,float16,float16,0,0.9057066440582275
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,2,128,1,float16,fp8,0,0.8171892960866293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,0.6235626538594564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,1,128,1,float16,float16,0,0.44673065344492596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,1,128,1,float16,fp8,0,0.3965386549631755
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,2,128,1,float16,fp8,0,0.45176533857981366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,2,128,1,float16,float16,0,0.23174399137496948
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,float16,0,0.6798559824625651
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,0.6160266796747843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,0.3569759925206502
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,1,128,1,float16,float16,0,0.2653866608937581
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,1,128,1,float16,fp8,0,0.24649600187937418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,2,128,1,float16,fp8,0,0.6005973418553671
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,float16,0,0.6882666746775309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,float16,0,0.39082133769989014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,0.3486666679382324
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,2,128,1,float16,float16,0,0.5006293455759684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,float16,0,0.19964265823364258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,0.20196266969045004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,1,128,1,float16,fp8,0,0.15127999583880106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,2,128,1,float16,float16,0,0.17151999473571777
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,float16,0,0.3917226791381836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,2,128,1,float16,fp8,0,0.1625333329041799
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,float16,0,0.20017067591349283
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,0.20278932650883993
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,1,128,1,float16,float16,0,1.0570879777272542
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,1,128,1,float16,fp8,0,0.9128373463948568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,2,128,1,float16,float16,0,1.21396803855896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,2,128,1,float16,fp8,0,1.082703987757365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,0.8710347016652426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,1,128,1,float16,float16,0,0.1653279960155487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,1,128,1,float16,float16,0,0.5626453161239624
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,2,128,1,float16,float16,0,0.6357813278834025
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,2,128,1,float16,fp8,0,0.5835413138071696
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,float16,0,0.9736426671346029
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,2,128,1,float16,float16,0,0.2816373308499654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,0.8703893025716146
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,0.46140801906585693
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,float16,0,0.509552001953125
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,1,128,1,float16,float16,0,0.31117333968480426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,1,128,1,float16,fp8,0,0.27724266052246094
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,2,128,1,float16,float16,0,0.34827200571695965
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,2,128,1,float16,fp8,0,0.21569067239761353
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,2,128,1,float16,fp8,0,0.32214399178822833
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,float16,0,0.4949013392130534
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,float16,0,0.2842293381690979
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,1,128,1,float16,float16,0,0.1942346692085266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,1,128,1,float16,fp8,0,0.17947200934092203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,2,128,1,float16,float16,0,0.2062186598777771
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,2,128,1,float16,fp8,0,0.19695999224980673
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,0.4623626470565796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,0.25863999128341675
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,float16,0,0.15412799517313638
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,0.15633599956830344
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,float16,0,0.9750026861826578
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,1,128,1,float16,float16,0,0.12462932864824931
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,1,128,1,float16,fp8,0,0.11560533444086711
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,2,128,1,float16,fp8,0,0.12458667159080505
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,float16,0,0.15286399920781454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,0.4662346839904785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,0.15624533096949259
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,0.2690826654434204
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,1,128,1,float16,float16,0,0.645029346148173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,2,128,1,float16,float16,0,0.7411786715189616
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,2,128,1,float16,fp8,0,0.6978773276011149
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,2,128,1,float16,fp8,0,0.2656000057856242
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,0.5945599873860677
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,1,128,1,float16,float16,0,0.3481706778208415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,1,128,1,float16,fp8,0,0.31911466519037884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,2,128,1,float16,fp8,0,0.38179731369018555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,2,128,1,float16,float16,0,0.13088533282279968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,1,128,1,float16,fp8,0,0.49830933411916095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,0.6028159856796265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,0.3278346657752991
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,1,128,1,float16,float16,0,0.19436800479888916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,float16,0,0.6488159894943237
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,1,128,1,float16,fp8,0,0.18218666315078735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,2,128,1,float16,fp8,0,0.2015786568323771
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,2,128,1,float16,float16,0,0.4010453224182129
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,float16,0,0.6285760005315145
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,0.32422399520874023
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,float16,0,0.17578667402267456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,0.18097599347432455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,float16,0,0.3448479970296224
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,2,128,1,float16,float16,0,0.13874666889508566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,2,128,1,float16,fp8,0,0.13285866379737854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,float16,0,0.18105065822601318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,0.1812373399734497
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,float16,0,0.11730666955312093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.12035199999809265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,1,128,1,float16,float16,0,0.09497599800427754
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,1,128,1,float16,fp8,0,0.08876267075538635
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,2,128,1,float16,float16,0,0.10008533795674641
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,1,128,1,float16,float16,0,0.1314826707045237
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,float16,0,0.11778666575749715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,1,128,1,float16,fp8,0,0.5716533263524374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,float16,0,0.2847359975179036
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,1,128,1,float16,float16,0,0.6384640137354533
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,1,128,1,float16,fp8,0,0.5696906646092733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,2,128,1,float16,float16,0,0.22350400686264038
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,2,128,1,float16,float16,0,0.7598666350046793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,0.6829706827799479
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,2,128,1,float16,fp8,0,0.7380373477935791
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,1,128,1,float16,float16,0,0.33898667494455975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,1,128,1,float16,fp8,0,0.30934399366378784
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,2,128,1,float16,float16,0,0.4107999801635742
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,1,128,1,float16,fp8,0,0.11980266372362773
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,0.6785439650217692
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,float16,0,0.378762682278951
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,0.37014933427174884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,1,128,1,float16,float16,0,0.1877653400103251
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,float16,0,0.7476267019907633
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,float16,0,0.34015464782714844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,2,128,1,float16,fp8,0,0.21582400798797607
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,2,128,1,float16,fp8,0,0.09694400429725647
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,2,128,1,float16,fp8,0,0.3965493440628052
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,float16,0,0.37120532989501953
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,float16,0,0.21110934019088745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,float16,0,0.7334773540496826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,1,128,1,float16,float16,0,0.11596799890200298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,1,128,1,float16,fp8,0,0.1099679966767629
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,2,128,1,float16,float16,0,0.12852266430854797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,float16,0,0.20374399423599243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,0.1825173298517863
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,1,128,1,float16,fp8,0,0.17001599073410034
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,float16,0,0.10168533523877461
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.11072533329327901
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,1,128,1,float16,float16,0,0.0727946658929189
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,2,128,1,float16,float16,0,0.0791733314593633
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,2,128,1,float16,fp8,0,0.07853333155314128
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,float16,0,0.10266666611035664
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,0.3490080038706462
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,float16,0,0.08115733166535695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,0.20037333170572916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.08274133503437042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,1,128,1,float16,float16,0,0.06649599969387054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,2,128,1,float16,float16,0,0.06963199873765309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,2,128,1,float16,fp8,0,0.06828799843788147
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,2,128,1,float16,fp8,0,0.12575466434160867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,float16,0,0.08058133224646251
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.08273066580295563
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,2,128,1,float16,float16,0,0.22985599438349405
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,1,128,1,float16,float16,0,0.39865068594614667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,1,128,1,float16,fp8,0,0.06832533578077953
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,2,128,1,float16,float16,0,0.4973920186360677
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,2,128,1,float16,fp8,0,0.49614401658376056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,0.48156265417734784
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.10949866970380147
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.11981333295504253
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,2,128,1,float16,float16,0,0.26902933915456134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,1,128,1,float16,fp8,0,0.062421331803003945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,float16,0,0.49715733528137207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,0.48657600084940594
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,float16,0,0.26393600304921466
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,1,128,1,float16,float16,0,0.11952533324559529
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,1,128,1,float16,fp8,0,0.11662933230400085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,1,128,1,float16,fp8,0,0.377125342686971
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,2,128,1,float16,fp8,0,0.14145066340764365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,float16,0,0.2691093285878499
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,float16,0,0.49836798508961994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,0.26285332441329956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,1,128,1,float16,fp8,0,0.20476800203323364
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,float16,0,0.13926399747530618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,2,128,1,float16,fp8,0,0.2675360043843587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,1,128,1,float16,fp8,0,0.07683200140794118
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,2,128,1,float16,float16,0,0.0888266662756602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,2,128,1,float16,fp8,0,0.08956799904505412
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.13795733451843262
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,float16,0,0.07893333335717519
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,0.26182399193445843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,1,128,1,float16,float16,0,0.05669333537419637
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,2,128,1,float16,float16,0,0.15010666847229004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,1,128,1,float16,fp8,0,0.054010664423306785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,2,128,1,float16,float16,0,0.06208533545335134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,2,128,1,float16,fp8,0,0.06221333146095276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,1,128,1,float16,float16,0,0.21955200036366782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,float16,0,0.06319466729958852
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.13874133427937826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.06423466900984447
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,1,128,1,float16,fp8,0,0.05004799862702688
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,float16,0,0.12611200412114462
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,2,128,1,float16,float16,0,0.055888002117474876
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,2,128,1,float16,fp8,0,0.053904001911481224
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,float16,0,0.0633546660343806
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,float16,0,0.0788373351097107
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,1,128,1,float16,float16,0,0.07888533174991608
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,1,128,1,float16,float16,0,0.05239999790986379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,2,128,1,float16,float16,0,0.5570079882939657
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,2,128,1,float16,fp8,0,0.5594133138656616
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.08470933636029561
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,0.6866986751556396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,1,128,1,float16,float16,0,0.22592000166575113
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,1,128,1,float16,fp8,0,0.21481066942214966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,2,128,1,float16,float16,0,0.29043734073638916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,1,128,1,float16,float16,0,0.4118880033493042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,float16,0,0.5750133196512858
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,float16,0,0.3121333320935567
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,1,128,1,float16,fp8,0,0.3961600065231323
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,1,128,1,float16,float16,0,0.12366400162378947
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,float16,0,0.6335093180338541
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.06443200012048085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,2,128,1,float16,fp8,0,0.15983999768892923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,float16,0,0.31436266501744586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,2,128,1,float16,fp8,0,0.2953866720199585
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,float16,0,0.16076266765594482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.16325866182645163
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,1,128,1,float16,float16,0,0.07446933289368947
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,1,128,1,float16,fp8,0,0.0726560006539027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,2,128,1,float16,float16,0,0.08699199557304382
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,1,128,1,float16,fp8,0,0.11331199606259663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,2,128,1,float16,float16,0,0.15852266550064087
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.15914133191108704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,float16,0,0.07668800155321757
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,1,128,1,float16,float16,0,0.047007997830708824
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.0848426620165507
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,1,128,1,float16,fp8,0,0.04603200157483419
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,2,128,1,float16,float16,0,0.053674668073654175
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,0.6898187001546224
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,float16,0,0.07660266757011414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.08661333719889323
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,float16,0,0.055999999245007835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.06011199951171875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,1,128,1,float16,float16,0,0.04098666707674662
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,1,128,1,float16,fp8,0,0.03953066716591517
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.08829333384831746
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,2,128,1,float16,fp8,0,0.044079999128977455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,float16,0,0.05606399973233541
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.05994133154551188
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,float16,0,0.045754666129748024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,2,128,1,float16,fp8,0,0.055770665407180786
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,1,128,1,float16,float16,0,0.03914133210976919
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,1,128,1,float16,fp8,0,0.0377866675456365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,2,128,1,float16,float16,0,0.03986666599909464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,2,128,1,float16,fp8,0,0.03962666789690653
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,2,128,1,float16,fp8,0,0.08919466535250346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,float16,0,0.04587733248869578
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,float16,0,0.1604693333307902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,2,128,1,float16,float16,0,0.04408533374468485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,1,128,1,float16,fp8,0,0.2692213257153829
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,2,128,1,float16,float16,0,0.3736000061035156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,0.30288533369700116
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,float16,0,0.4432426691055298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.04604800045490265
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,1,128,1,float16,float16,0,0.1507146656513214
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,1,128,1,float16,fp8,0,0.1474346617857615
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,2,128,1,float16,fp8,0,0.2087093393007914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,float16,0,0.4445706605911255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,0.41198933124542236
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,1,128,1,float16,float16,0,0.2752373417218526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,float16,0,0.22425599892934164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.2251573403676351
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,2,128,1,float16,fp8,0,0.39306668440500897
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,2,128,1,float16,float16,0,0.10943999886512756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,0.4190026521682739
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,0.29525866111119586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,float16,0,0.2277066707611084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,float16,0,0.11261333028475444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,2,128,1,float16,float16,0,0.2057653268178304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,1,128,1,float16,float16,0,0.05400000015894572
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.04593066871166229
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,2,128,1,float16,float16,0,0.06434133152167003
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,2,128,1,float16,fp8,0,0.06653866668542226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,float16,0,0.10142933328946431
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,1,128,1,float16,float16,0,0.0825386643409729
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.1172213355700175
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,float16,0,0.0602453351020813
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,1,128,1,float16,float16,0,0.03774933268626531
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,1,128,1,float16,fp8,0,0.03761066744724909
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,2,128,1,float16,fp8,0,0.103301336367925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,2,128,1,float16,fp8,0,0.04399999976158142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,float16,0,0.05974400043487549
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.11587199568748474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.06837333242098491
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,float16,0,0.04381333291530609
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.047983999053637184
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,1,128,1,float16,float16,0,0.03364266703526179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,2,128,1,float16,float16,0,0.03571200122435888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,1,128,1,float16,fp8,0,0.08163199822107951
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,2,128,1,float16,fp8,0,0.036288000643253326
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.06830933193365733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.04795733094215393
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,2,128,1,float16,float16,0,0.042597333590189614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.03771200031042099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,1,128,1,float16,float16,0,0.031871999303499855
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,1,128,1,float16,fp8,0,0.03145600110292435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,1,128,1,float16,fp8,0,0.05329066514968872
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,2,128,1,float16,float16,0,0.0337119996547699
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,1,128,1,float16,fp8,0,0.033546666304270424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.03766400118668874
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,float16,0,0.04364266494909922
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,float16,0,0.037605332831541695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,1,128,1,float16,fp8,0,0.31009600559870404
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.22843732436498007
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,2,128,1,float16,float16,0,0.4381066560745239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,float16,0,0.5286133289337158
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,2,128,1,float16,fp8,0,0.46779199441274005
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,float16,0,0.03751466671625773
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,1,128,1,float16,fp8,0,0.1649279991785685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,2,128,1,float16,fp8,0,0.25121599435806274
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,float16,0,0.5596053202946981
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,fp8,0,0.4880959987640381
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.26254934072494507
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,2,128,1,float16,fp8,0,0.03333866596221924
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,2,128,1,float16,float16,0,0.13008532921473184
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,2,128,1,float16,float16,0,0.23972799380620322
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,2,128,1,float16,fp8,0,0.1297546625137329
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,float16,0,0.28198399146397907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,1,128,1,float16,float16,0,0.31243733565012616
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,float16,0,0.28467732667922974
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.25974400838216144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,float16,0,0.1502240002155304
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,1,128,1,float16,float16,0,0.053957333167394005
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,2,128,1,float16,float16,0,0.06676800052324931
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,2,128,1,float16,fp8,0,0.0718453327814738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,1,128,1,float16,fp8,0,0.08367466926574707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.12249066432317098
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,float16,0,0.062421331803003945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,1,128,1,float16,float16,0,0.03366933266321818
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,1,128,1,float16,fp8,0,0.033514666060606636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,2,128,1,float16,float16,0,0.03962666789690653
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,2,128,1,float16,fp8,0,0.043738668163617454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,1,128,1,float16,float16,0,0.09056533376375835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,1,128,1,float16,fp8,0,0.05422399938106537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,float16,0,0.0620959997177124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,float16,0,0.04394133388996124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.04808533191680908
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,1,128,1,float16,float16,0,0.028069332242012024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,2,128,1,float16,float16,0,0.03164800008138021
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.07443200051784515
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,2,128,1,float16,fp8,0,0.03355200091997782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,float16,0,0.04201599955558777
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.049029335379600525
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.03573866685231527
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.12876799702644348
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,1,128,1,float16,fp8,0,0.025600001215934753
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.07463466624418895
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,2,128,1,float16,float16,0,0.027295999228954315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,float16,0,0.033514666060606636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.035402665535608925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,float16,0,0.027600000301996868
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,float16,0,0.03339199970165888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.029274667302767437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,1,128,1,float16,float16,0,0.025360000630219776
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,1,128,1,float16,fp8,0,0.023157333334287006
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,2,128,1,float16,float16,0,0.02532800038655599
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,float16,0,0.027456000447273254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.02957333376010259
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,1,128,1,float16,float16,0,0.025221332907676697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,1,128,1,float16,float16,0,0.2604586680730184
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,1,128,1,float16,float16,0,0.16550399859746298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,1,128,1,float16,fp8,0,0.027327999472618103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,1,128,1,float16,fp8,0,0.2614293297131856
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,2,128,1,float16,float16,0,0.39210132757822674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,2,128,1,float16,fp8,0,0.4264426628748576
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,float16,0,0.5332906643549601
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.49066134293874103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,1,128,1,float16,float16,0,0.14078399538993835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,2,128,1,float16,float16,0,0.20335467656453451
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,2,128,1,float16,fp8,0,0.025231999655564625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,2,128,1,float16,fp8,0,0.22454400857289633
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,2,128,1,float16,fp8,0,0.027376001079877216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,float16,0,0.5324639876683553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,float16,0,0.2652906576792399
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.49214935302734375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,1,128,1,float16,float16,0,0.07419200241565704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,1,128,1,float16,fp8,0,0.07051733136177063
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,2,128,1,float16,float16,0,0.115365336338679
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,float16,0,0.265994668006897
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.24495999018351236
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.13732266426086426
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,1,128,1,float16,float16,0,0.04348266621430715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,1,128,1,float16,fp8,0,0.13991999626159668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,1,128,1,float16,fp8,0,0.04393066465854645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,2,128,1,float16,float16,0,0.05600533386071523
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.2563680013020833
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,float16,0,0.1376053293546041
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.11487467090288798
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,float16,0,0.056074668963750206
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,2,128,1,float16,fp8,0,0.11559999982515971
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,float16,0,0.13825600345929465
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,1,128,1,float16,float16,0,0.02626666675011317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,2,128,1,float16,float16,0,0.033557333052158356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,2,128,1,float16,fp8,0,0.03763733307520548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,float16,0,0.0562720000743866
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,float16,0,0.036992001036802925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,1,128,1,float16,float16,0,0.021397332350413006
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,2,128,1,float16,fp8,0,0.06234133243560791
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,1,128,1,float16,fp8,0,0.023226665953795116
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,2,128,1,float16,float16,0,0.025445332129796345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,2,128,1,float16,fp8,0,0.02740799884001414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,float16,0,0.03572800010442734
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.06964266796906789
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.04380266865094503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,1,128,1,float16,fp8,0,0.027322667340437572
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.02935466667016347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,1,128,1,float16,float16,0,0.0194560003777345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,1,128,1,float16,fp8,0,0.019061333189407986
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,float16,0,0.026698666314284008
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.043749332427978516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.02939733366171519
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.023215999205907185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,1,128,1,float16,float16,0,0.017514667163292568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,1,128,1,float16,fp8,0,0.018181333939234417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,2,128,1,float16,fp8,0,0.019541333119074505
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,float16,0,0.02149333308140437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,2,128,1,float16,float16,0,0.021359999974568684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.023344000180562336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,float16,0,0.01933866615096728
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,float16,0,0.021151999632517498
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,1,128,1,float16,float16,0,0.017082666357358296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,1,128,1,float16,fp8,0,0.017194667210181553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,2,128,1,float16,float16,0,0.019258666783571243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,float16,0,0.025434667865435284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.019280000279347103
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.06837333242098491
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.019215999792019527
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,1,128,1,float16,fp8,0,0.12356799840927124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,2,128,1,float16,float16,0,0.1975733240445455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,float16,0,0.2576746741930644
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,2,128,1,float16,fp8,0,0.21158399184544882
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,float16,0,0.01918399954835574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.2536533276240031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,1,128,1,float16,float16,0,0.06489066779613495
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,1,128,1,float16,fp8,0,0.06452799836794536
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,2,128,1,float16,float16,0,0.10639466842015584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,2,128,1,float16,fp8,0,0.10097066561381023
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,float16,0,0.2585493326187134
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.25017066796620685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.11408533652623494
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,2,128,1,float16,float16,0,0.01781333362062772
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,2,128,1,float16,float16,0,0.048911998669306435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,2,128,1,float16,fp8,0,0.056048000852266945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,float16,0,0.12943999965985617
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.11633066336313884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,float16,0,0.05216533442338308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,1,128,1,float16,float16,0,0.12317333618799846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,float16,0,0.12468799948692322
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,1,128,1,float16,float16,0,0.023237332701683044
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,1,128,1,float16,fp8,0,0.02550400048494339
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,2,128,1,float16,fp8,0,0.01711999997496605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,1,128,1,float16,float16,0,0.03661333272854487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,float16,0,0.0524479995171229
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,float16,0,0.03356266766786575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,1,128,1,float16,float16,0,0.019285333653291065
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,2,128,1,float16,fp8,0,0.021327999730904896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,2,128,1,float16,float16,0,0.021418665846188862
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.06637866795063019
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,2,128,1,float16,fp8,0,0.025424001117547352
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,float16,0,0.03344533344109853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.039813332259655
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,2,128,1,float16,fp8,0,0.03523733218510946
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.02739199995994568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,1,128,1,float16,float16,0,0.017024000485738117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.06632000207901001
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,2,128,1,float16,float16,0,0.017184000462293625
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,2,128,1,float16,fp8,0,0.019205333044131596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,float16,0,0.0232640008131663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.027488000690937042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,float16,0,0.019098666807015736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,1,128,1,float16,float16,0,0.015013333410024643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,1,128,1,float16,fp8,0,0.015103999525308609
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,float16,0,0.023232000569502514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,1,128,1,float16,fp8,0,0.03950933367013931
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,2,128,1,float16,float16,0,0.01699200024207433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,2,128,1,float16,fp8,0,0.0170666662355264
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.0397173340121905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,float16,0,0.01605333387851715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.017082666357358296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,1,128,1,float16,float16,0,0.014991999914248785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,1,128,1,float16,fp8,0,0.01912533367673556
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,1,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,2,128,1,float16,float16,0,0.015072000523408255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,2,128,1,float16,fp8,0,0.014970666418472925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,float16,0,0.016074666132529575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.017279999951521557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,float16,0,0.015040000279744467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,1,128,1,float16,float16,0,0.01523200049996376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,1,128,1,float16,fp8,0,0.01516266663869222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.021146667500336964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,2,128,1,float16,fp8,0,0.014970666418472925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,float16,0,0.015013333410024643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.02123733361562093
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,1,128,1,float16,float16,0,0.0689386675755183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,1,128,1,float16,fp8,0,0.06635733445485432
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,2,128,1,float16,float16,0,0.02945599953333537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,2,128,1,float16,float16,0,0.11005866527557373
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,float16,0,0.13620266318321228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,2,128,1,float16,float16,0,0.015114666273196539
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,1,128,1,float16,float16,0,0.03955733279387156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.11894399921099345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,1,128,1,float16,fp8,0,0.041722665230433144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,2,128,1,float16,float16,0,0.052000001072883606
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,float16,0,0.1358560025691986
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.06698133548100789
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,1,128,1,float16,float16,0,0.0236160010099411
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,float16,0,0.01903466631968816
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,2,128,1,float16,float16,0,0.03143999973932902
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,2,128,1,float16,fp8,0,0.0354666660229365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,2,128,1,float16,fp8,0,0.059157331784566246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,float16,0,0.05390933156013489
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,float16,0,0.054010664423306785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.06753066678841908
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,float16,0,0.03151999910672506
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.03766400118668874
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,1,128,1,float16,float16,0,0.01717866708834966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,1,128,1,float16,fp8,0,0.026693334182103474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,2,128,1,float16,fp8,0,0.023258666197458904
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.03789866715669632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,float16,0,0.021295999487241108
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,1,128,1,float16,fp8,0,0.017344000438849132
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.02441066751877467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,2,128,1,float16,fp8,0,0.11085333426793416
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,2,128,1,float16,float16,0,0.015008000036080679
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,1,128,1,float16,fp8,0,0.014954666296641031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,2,128,1,float16,fp8,0,0.0170666662355264
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,float16,0,0.02120000123977661
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.1200320025285085
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.01823466643691063
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,1,128,1,float16,float16,0,0.0129120002190272
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,1,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,2,128,1,float16,float16,0,0.019194666296243668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,2,128,1,float16,float16,0,0.01321600005030632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,2,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,float16,0,0.017130666722853977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.017279999951521557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,float16,0,0.012981332838535309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.015333333363135656
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,1,128,1,float16,float16,0,0.01322666679819425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,1,128,1,float16,fp8,0,0.01240533341964086
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,2,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,float16,0,0.013061333447694778
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.024703999360402424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.015082667271296183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,float16,0,0.011887999872366587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,1,128,1,float16,float16,0,0.011616000284751257
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,1,128,1,float16,fp8,0,0.017423999806245167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,2,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,float16,0,0.01301866645614306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,float16,0,0.03145600110292435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,float16,0,0.012634667257467905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,1,128,1,float16,float16,0,0.011178666104873022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,1,128,1,float16,float16,0,0.01138666644692421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,1,128,1,float16,fp8,0,0.012266666938861212
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,2,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,2,128,1,float16,float16,0,0.011050666371981302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,float16,0,0.016309333344300587
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,1,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,2,128,1,float16,float16,0,0.011183999478816986
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,1,128,1,float16,float16,0,0.04797866443792979
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,1,128,1,float16,fp8,0,0.050111999114354454
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,2,128,1,float16,float16,0,0.060165335734685264
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,float16,0,0.059903999169667564
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.07237333556016286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,1,128,1,float16,float16,0,0.029653333127498627
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,1,128,1,float16,fp8,0,0.03141866624355316
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,2,128,1,float16,float16,0,0.037151999771595
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,float16,0,0.0601440022389094
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.07247466842333476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.041637333730856575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,1,128,1,float16,fp8,0,0.020501332978407543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,2,128,1,float16,float16,0,0.023200000325838726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,2,128,1,float16,fp8,0,0.025274666647116344
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,2,128,1,float16,float16,0,0.013013333082199097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,float16,0,0.034128000338872276
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,float16,0,0.012842666357755661
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,float16,0,0.02143466720978419
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.04179200033346812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,1,128,1,float16,float16,0,0.015040000279744467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,1,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,1,128,1,float16,float16,0,0.019071999937295914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,2,128,1,float16,fp8,0,0.017210666090250015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,float16,0,0.021130666136741638
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,2,128,1,float16,fp8,0,0.0668639987707138
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,float16,0,0.0170666662355264
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,1,128,1,float16,float16,0,0.01310933381319046
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,2,128,1,float16,fp8,0,0.04171200096607208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,2,128,1,float16,float16,0,0.013104000439246496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.023685333629449207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,float16,0,0.03372266640265783
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.017242666333913803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,float16,0,0.013232000172138214
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.013349333157142004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,1,128,1,float16,fp8,0,0.012725333372751871
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,2,128,1,float16,float16,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,2,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.023738667368888855
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.013365333278973898
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,float16,0,0.012949333836634954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.011648000528415045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,1,128,1,float16,float16,0,0.011039999624093374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,1,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,2,128,1,float16,fp8,0,0.013327999661366144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,float16,0,0.017103999853134155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,2,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,float16,0,0.011183999478816986
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,float16,0,0.011306667079528173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,1,128,1,float16,float16,0,0.011077333241701126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,2,128,1,float16,float16,0,0.015279999623696009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,1,128,1,float16,fp8,0,0.011178666104873022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,2,128,1,float16,float16,0,0.01118933285276095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,float16,0,0.011146667102972666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.011546666423479715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,float16,0,0.010938666760921478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,1,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,2,128,1,float16,float16,0,0.011274666835864386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,1,128,1,float16,fp8,0,0.011616000284751257
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,2,128,1,float16,float16,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,float16,0,0.011317333827416102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.011733333269755045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,1,128,1,float16,float16,0,0.011087999989589056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,2,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,float16,0,0.011264000087976456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,1,128,1,float16,fp8,0,0.0397119993964831
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,1,128,1,float16,float16,0,0.01098666712641716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,2,128,1,float16,float16,0,0.04674133161703745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,2,128,1,float16,fp8,0,0.011605333536863327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,float16,0,0.03973866750796636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,2,128,1,float16,fp8,0,0.049925332268079124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,1,128,1,float16,float16,0,0.02532800038655599
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.011301333705584208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,2,128,1,float16,float16,0,0.029461334149042766
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,2,128,1,float16,fp8,0,0.031397332747777305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,float16,0,0.03956799954175949
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,float16,0,0.02332266668478648
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.027402666707833607
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.01129066695769628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,1,128,1,float16,float16,0,0.01716800034046173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,2,128,1,float16,float16,0,0.019146667172511418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,2,128,1,float16,fp8,0,0.01940800001223882
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,float16,0,0.024645333488782246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.04612799982229868
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.01815466706951459
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,1,128,1,float16,float16,0,0.01302933320403099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,1,128,1,float16,fp8,0,0.026714667677879333
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,2,128,1,float16,float16,0,0.013349333157142004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,2,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,float16,0,0.01709866647919019
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,1,128,1,float16,float16,0,0.039781334499518074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.017818666994571686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,1,128,1,float16,fp8,0,0.017263999829689663
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,1,128,1,float16,float16,0,0.011045332998037338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.013733333597580591
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.02740799884001414
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,2,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,float16,0,0.01658133293191592
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,float16,0,0.013007999708255133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,float16,0,0.011157333850860596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.011264000087976456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,1,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,1,128,1,float16,fp8,0,0.011605333536863327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.04578666885693868
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,float16,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,float16,0,0.01322666679819425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,1,128,1,float16,fp8,0,0.01184533288081487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,float16,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,2,128,1,float16,float16,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,1,128,1,float16,float16,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.011301333705584208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.01339200014869372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,2,128,1,float16,float16,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,float16,0,0.01119999960064888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,1,128,1,float16,float16,0,0.011125333607196808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,2,128,1,float16,float16,0,0.01101333275437355
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,float16,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,2,128,1,float16,fp8,0,0.01137599969903628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,1,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,2,128,1,float16,float16,0,0.010869332899649939
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.011285333583752314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,2,128,1,float16,fp8,0,0.011205332974592844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,float16,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,float16,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.011434666812419891
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.011152000476916632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,1,128,1,float16,fp8,0,0.011066666493813196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,2,128,1,float16,float16,0,0.011312000453472137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,2,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,float16,0,0.011136000355084738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.01128000020980835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,2,128,1,float16,fp8,0,0.011285333583752314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,1,128,1,float16,float16,0,0.035829332967599235
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,1,128,1,float16,float16,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,1,128,1,float16,fp8,0,0.03414933383464813
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.011695999652147293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,2,128,1,float16,fp8,0,0.039642666776975
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,float16,0,0.0305173322558403
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.03325333446264267
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,1,128,1,float16,float16,0,0.023365333676338196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,1,128,1,float16,fp8,0,0.023183998962243397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,2,128,1,float16,fp8,0,0.025466665625572205
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,2,128,1,float16,float16,0,0.025253333151340485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,float16,0,0.029845332105954487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.031717332700888314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.021349333226680756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,1,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,2,128,1,float16,fp8,0,0.017418666432301205
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,float16,0,0.019141333798567455
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,1,128,1,float16,float16,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.021221332252025604
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,float16,0,0.013183999806642532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,1,128,1,float16,float16,0,0.012527999778588613
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,2,128,1,float16,float16,0,0.012853333105643591
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,1,128,1,float16,fp8,0,0.011205332974592844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,2,128,1,float16,fp8,0,0.013237333546082178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,float16,0,0.013151999562978745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,2,128,1,float16,float16,0,0.017055999487638474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.015141333142916361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.011434666812419891
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,1,128,1,float16,float16,0,0.011146667102972666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,1,128,1,float16,fp8,0,0.011413333316644033
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,2,128,1,float16,fp8,0,0.012319999436537424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,1,128,1,float16,fp8,0,0.012719999998807907
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,float16,0,0.011077333241701126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,float16,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,1,128,1,float16,float16,0,0.01126933346192042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,1,128,1,float16,fp8,0,0.011178666104873022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,1,128,1,float16,float16,0,0.01617066686352094
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,2,128,1,float16,fp8,0,0.011183999478816986
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,float16,0,0.011296000331640244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,2,128,1,float16,float16,0,0.039936001102129616
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,float16,0,0.010911999891201654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,1,128,1,float16,float16,0,0.010890666395425797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,2,128,1,float16,float16,0,0.011087999989589056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,2,128,1,float16,float16,0,0.011205332974592844
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.01129066695769628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,2,128,1,float16,float16,0,0.01101333275437355
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,float16,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.01139733319481214
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,float16,0,0.011077333241701126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,1,128,1,float16,fp8,0,0.011066666493813196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,2,128,1,float16,float16,0,0.010981333752473196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,1,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,float16,0,0.012058666596810022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.011055999745925268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,float16,0,0.010911999891201654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,float16,0,0.01922133316596349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.011296000331640244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,1,128,1,float16,float16,0,0.010928000013033548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,1,128,1,float16,fp8,0,0.010890666395425797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,2,128,1,float16,fp8,0,0.011071999867757162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,float16,0,0.010890666395425797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,float16,0,0.011242666592200598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.011541333049535751
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,0,0.011002667248249054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,0,0.015002666662136713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,2,128,1,float16,fp8,0,0.011077333241701126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,1,128,1,float16,float16,0,0.011194666226704916
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,0,0.010954666882753372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,0,0.01101333275437355
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,0,0.011354666203260422
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,0,0.014890667051076889
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,0,0.01098666712641716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.011045332998037338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,0,0.015077333897352219
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,0,0.009541333342591921
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,0,0.010666667173306147
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,0,0.009370666618148485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,2,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,0,0.009578666960199675
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,0,0.008896000062425932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,0,0.009312000125646591
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.009279999881982803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,0,0.00891733355820179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.01368533323208491
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,0,0.00892800030608972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,0,0.009061333412925402
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,0,0.00972800018886725
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,0,0.00898133342464765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,0,0.00914666677514712
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,0,0.011237333218256632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,0,0.009173333023985228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,0,0.00895999992887179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.00922133338948091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,0,0.008858666444818178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,0,0.009114666531483332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,0,0.009119999905427298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,0,0.009205333267649015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,0,0.009173333023985228
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.009248000259200731
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,0,0.009130666653315226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.00922133338948091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,0,0.009125333279371262
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,0,0.008885333314538002
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,0,0.009205333267649015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,0,0.008858666444818178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.009162666896979014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,0,0.008949333180983862
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,0,0.009296000003814697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,0,0.008965333302815756
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.009119999905427298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,0,0.009141333401203156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,0,0.009002666920423508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,0,0.009114666531483332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,0,0.008938666433095932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,2,1,128,1,float16,float16,0,1.0626133282979329
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,float16,0,0.6584746837615967
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,0.6200746695200602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,1,128,1,float16,float16,0,0.5897333224614462
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,1,128,1,float16,fp8,0,0.5260693232218424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,float16,0,0.659226655960083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,0.6133439938227335
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,0.35624531904856366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,1,128,1,float16,float16,0,0.35075199604034424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,1,128,1,float16,fp8,0,0.32515732447306317
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,float16,0,0.39410666624705
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,0.3545920054117839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,float16,0,0.2409813404083252
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,2,1,128,1,float16,fp8,0,0.9281653563181559
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,1,128,1,float16,fp8,0,0.21040532986323038
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,1,128,1,float16,float16,0,0.2273013393084208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.009242666885256767
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,0.2283626596132914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,2,1,128,1,float16,float16,0,0.6525760094324747
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,2,1,128,1,float16,fp8,0,0.5896000067392985
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,float16,0,0.4252693255742391
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,1,128,1,float16,float16,0,0.3731893301010132
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,1,128,1,float16,fp8,0,0.3349706729253133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,float16,0,0.4271999994913737
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,0,0.008821333448092142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,float16,0,0.24984532594680786
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,0.2283253272374471
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,0.23966399828592935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,float16,0,0.24022400379180908
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,1,128,1,float16,fp8,0,0.2162239948908488
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,float16,0,0.25332800547281903
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,0.23703465859095255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,0.17627199490865073
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,1,128,1,float16,float16,0,0.17293866475423178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,float16,0,0.3978293339411418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,float16,0,0.18361065785090128
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,0.1771786610285441
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,2,1,128,1,float16,float16,0,0.48635733127593994
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,2,1,128,1,float16,fp8,0,0.44439466794331867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,float16,0,0.3302346666653951
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,1,128,1,float16,float16,0,0.23629866043726602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,0.31466132402420044
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,1,128,1,float16,float16,0,0.27530133724212646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,1,128,1,float16,fp8,0,0.26122132937113446
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,float16,0,0.33585067590077716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,0.31037867069244385
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,float16,0,0.1872373421986898
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,1,128,1,float16,fp8,0,0.16182399789492288
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,0.18297600746154785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,1,128,1,float16,float16,0,0.17006399234135947
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,1,128,1,float16,fp8,0,0.16078399618466696
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,float16,0,0.18971733252207437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,0.18195199966430664
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,float16,0,0.15507733821868896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,1,128,1,float16,float16,0,0.14640000462532043
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,1,128,1,float16,fp8,0,0.13756799697875977
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,float16,0,0.15556266903877258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.14839999874432883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,float16,0,0.18469866116841635
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,2,1,128,1,float16,fp8,0,0.568336009979248
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,float16,0,0.42202667395273846
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,0.40622933705647785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.14847466349601746
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,1,128,1,float16,fp8,0,0.31717334191004437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,float16,0,0.25013866027196247
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,0.2225546638170878
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,1,128,1,float16,float16,0,0.20696532726287842
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,1,128,1,float16,fp8,0,0.19422932465871176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,2,1,128,1,float16,float16,0,0.615882674853007
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,float16,0,0.24949334065119425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,float16,0,0.14455999930699667
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,0.1402773360411326
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,0.40540266036987305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,1,128,1,float16,float16,0,0.1296266714731852
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,float16,0,0.14483732978502908
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,1,128,1,float16,float16,0,0.3452853361765544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,float16,0,0.12739200393358865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,float16,0,0.41763734817504883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,1,128,1,float16,float16,0,0.11974400281906128
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,1,128,1,float16,fp8,0,0.11293866237004598
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,float16,0,0.12802666425704956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.12248532970746358
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,0.2205173373222351
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,2,1,128,1,float16,fp8,0,0.3715840180714925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,float16,0,0.2740373412768046
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,1,128,1,float16,float16,0,0.22115200757980347
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,0.1402133305867513
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.12261333068211873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,float16,0,0.27801599105199176
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,float16,0,0.15154133240381876
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,0.4014293352762858
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,0.15429866313934326
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,2,1,128,1,float16,float16,0,0.3934239943822225
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,float16,0,0.15893866618474325
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,0.27483199040095013
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,0.1549066702524821
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,0.4041546583175659
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.10921600461006165
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,1,128,1,float16,fp8,0,0.09522666533788045
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,0.2755413254102071
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,float16,0,0.1114026705423991
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,float16,0,0.09763200084368388
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.09517332911491394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,1,128,1,float16,float16,0,0.09286399682362874
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,float16,0,0.11089066664377849
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,1,128,1,float16,fp8,0,0.0869706670443217
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,float16,0,0.09833600123723348
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.09490666786829631
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,2,1,128,1,float16,float16,0,0.3948906660079956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,2,1,128,1,float16,fp8,0,0.3760319948196411
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.10931733250617981
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,float16,0,0.2942240039507548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,0.29263466596603394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,1,128,1,float16,fp8,0,0.2072533369064331
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,1,128,1,float16,fp8,0,0.12298132975896199
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,0.298362672328949
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,1,128,1,float16,fp8,0,0.20453866322835287
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.15094932913780212
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,1,128,1,float16,float16,0,0.12666133046150208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,float16,0,0.17003732919692993
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,1,128,1,float16,float16,0,0.1360053320725759
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.15119466185569763
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,1,128,1,float16,float16,0,0.21804799636205038
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.09485866626103719
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,1,128,1,float16,float16,0,0.07892266909281413
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,1,128,1,float16,fp8,0,0.07685866455237071
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,float16,0,0.09326933821042378
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.09453333417574565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,1,128,1,float16,float16,0,0.09877866506576538
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,float16,0,0.07664533456166585
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,1,128,1,float16,float16,0,0.06864533325036366
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,float16,0,0.07664533456166585
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.07659199833869934
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,1,128,1,float16,fp8,0,0.13011200229326883
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,float16,0,0.06840533514817555
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.06670933465162913
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,1,128,1,float16,float16,0,0.06471999982992808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,1,128,1,float16,fp8,0,0.06206400195757548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,float16,0,0.0682826687892278
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,float16,0,0.16957332690556845
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.07689066727956136
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,2,1,128,1,float16,float16,0,0.2614453236262004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,1,128,1,float16,fp8,0,0.06639466683069865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,2,1,128,1,float16,fp8,0,0.25878934065500897
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,float16,0,0.19937600692113241
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,0.2082293430964152
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,1,128,1,float16,float16,0,0.14842133720715842
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,1,128,1,float16,fp8,0,0.13432000080744425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,float16,0,0.20031466086705527
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,float16,0,0.2856266697247823
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.10725333293279012
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,1,128,1,float16,float16,0,0.08912000060081482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,float16,0,0.11020267009735107
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,1,128,1,float16,fp8,0,0.12432533502578735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,float16,0,0.07270933190981548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.07454399764537811
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,float16,0,0.0922933320204417
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,1,128,1,float16,float16,0,0.06224533418814341
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,float16,0,0.07241599758466084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.06659199794133504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,float16,0,0.06007466713587443
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.0603413333495458
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.1123413344224294
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,1,128,1,float16,fp8,0,0.053530668218930565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,float16,0,0.06003733476003011
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.06062399844328562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,float16,0,0.05394133428732554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.054474666714668274
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,0.20786132415135702
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,1,128,1,float16,fp8,0,0.050197333097457886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,float16,0,0.05399466554323832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.054197331269582115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,1,128,1,float16,float16,0,0.05482666691144308
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,2,1,128,1,float16,float16,0,0.28775999943415326
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,2,1,128,1,float16,fp8,0,0.28012800216674805
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,float16,0,0.2204586664835612
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,0.23609066009521484
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,1,128,1,float16,fp8,0,0.1495519975821177
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,float16,0,0.1116480032602946
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,float16,0,0.2222879926363627
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,0.23617599407831827
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,float16,0,0.12217066685358684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,1,128,1,float16,fp8,0,0.08719999591509502
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,1,128,1,float16,float16,0,0.08442133665084839
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,1,128,1,float16,fp8,0,0.08673600355784099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,float16,0,0.12461333473523457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,float16,0,0.06656000018119812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.0717439999183019
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,1,128,1,float16,float16,0,0.05264000097910563
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,1,128,1,float16,float16,0,0.15717333555221558
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,1,128,1,float16,fp8,0,0.0601440022389094
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,float16,0,0.051498666405677795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.11930132905642192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.0540533314148585
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,1,128,1,float16,fp8,0,0.04423999786376953
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.11250666777292888
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,float16,0,0.05215999980767568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,float16,0,0.043866669138272606
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,float16,0,0.06656000018119812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,1,128,1,float16,float16,0,0.05197866757710775
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.07441600163777669
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,float16,0,0.04377600053946177
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.044112001856168113
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,float16,0,0.03967999915281931
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.03959999978542328
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,1,128,1,float16,fp8,0,0.054144000013669334
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,1,128,1,float16,float16,0,0.03761066744724909
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.04391466577847799
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,1,128,1,float16,fp8,0,0.03770666569471359
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.07193600138028462
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,1,128,1,float16,fp8,0,0.03921066721280416
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,2,1,128,1,float16,float16,0,0.19801066319147745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,2,1,128,1,float16,fp8,0,0.20148799816767374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,float16,0,0.15971733132998148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,1,128,1,float16,float16,0,0.04374399781227112
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.17057599623998007
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,1,128,1,float16,fp8,0,0.10339200496673584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.17044800519943237
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,float16,0,0.1599999964237213
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.05399466554323832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,float16,0,0.07859733204046886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.08782399694124858
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,float16,0,0.039594667653242745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.08649067083994548
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,float16,0,0.05219733218352
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,1,128,1,float16,float16,0,0.04161066561937332
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,1,128,1,float16,fp8,0,0.04364266494909922
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,float16,0,0.052229334910710655
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,1,128,1,float16,float16,0,0.10517332951227824
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.05735466877619425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,float16,0,0.04158399999141693
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.04385066529115041
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,1,128,1,float16,float16,0,0.03565866748491923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,1,128,1,float16,float16,0,0.06354666749636333
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,1,128,1,float16,float16,0,0.03976000100374222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,float16,0,0.041834667325019836
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.05707733333110809
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.03612799942493439
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,1,128,1,float16,fp8,0,0.03350933392842611
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,float16,0,0.03558400024970373
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.03575466573238373
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,float16,0,0.03365866591533025
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,1,128,1,float16,fp8,0,0.03694933404525121
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.03951466580231985
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.043749332427978516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,float16,0,0.03563733398914337
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,1,128,1,float16,fp8,0,0.031354665756225586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.0317493329445521
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,2,1,128,1,float16,float16,0,0.2273120085398356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,float16,0,0.07889600098133087
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,2,1,128,1,float16,fp8,0,0.23482666412989298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.03357866654793421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,float16,0,0.18893865744272867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,1,128,1,float16,float16,0,0.03346666693687439
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.20723734299341837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,1,128,1,float16,fp8,0,0.12140267093976338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,float16,0,0.1874826749165853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.2071733276049296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,1,128,1,float16,float16,0,0.06288533409436543
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,1,128,1,float16,fp8,0,0.06855999926726024
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,float16,0,0.10286933183670044
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.09540800253550212
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,float16,0,0.05300800005594889
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,float16,0,0.033520000676314034
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.05977599819501241
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,1,128,1,float16,float16,0,0.12378666798273723
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,1,128,1,float16,fp8,0,0.04177600145339966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.06016000111897787
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.09892800450325012
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,1,128,1,float16,float16,0,0.03137599925200144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,1,128,1,float16,fp8,0,0.03345600018898646
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,float16,0,0.03827200084924698
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,1,128,1,float16,float16,0,0.031685332457224526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.04190400242805481
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.03346133232116699
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,float16,0,0.05366933345794678
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,float16,0,0.1067680021127065
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,float16,0,0.037989333271980286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,float16,0,0.03143466760714849
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.03352533280849457
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.027434666951497395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,1,128,1,float16,fp8,0,0.06406400104363759
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,1,128,1,float16,float16,0,0.025301332275072735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,1,128,1,float16,fp8,0,0.02533866713444392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,float16,0,0.027295999228954315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,1,128,1,float16,float16,0,0.039664000272750854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,float16,0,0.025253333151340485
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,1,128,1,float16,float16,0,0.023743999501069386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,1,128,1,float16,float16,0,0.027456000447273254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.04341333111127218
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,float16,0,0.025237334271272022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.025301332275072735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,2,1,128,1,float16,float16,0,0.19418134291966757
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,float16,0,0.03170666595300039
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,2,1,128,1,float16,fp8,0,0.20998932917912802
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.02536533276240031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,float16,0,0.17272533973058066
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,1,128,1,float16,float16,0,0.10850666960080464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,1,128,1,float16,fp8,0,0.10576533277829488
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,float16,0,0.027424000203609467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,float16,0,0.09154133001963298
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.08915199836095174
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,1,128,1,float16,float16,0,0.05268266797065735
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,1,128,1,float16,fp8,0,0.059077332417170204
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,float16,0,0.09221866726875305
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.09019200007120769
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,float16,0,0.04680533210436503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,1,128,1,float16,fp8,0,0.02755733331044515
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.05338133374849955
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,1,128,1,float16,float16,0,0.03363200028737386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,1,128,1,float16,fp8,0,0.035589332381884255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,float16,0,0.04722133278846741
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.05395199855168661
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.03756800045569738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.02736533433198929
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,1,128,1,float16,fp8,0,0.02735999971628189
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,float16,0,0.0315786674618721
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.037317333122094475
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,float16,0,0.025397333006064098
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,1,128,1,float16,fp8,0,0.023306667804718018
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,1,128,1,float16,fp8,0,0.02124800036350886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,float16,0,0.025349333882331848
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.19137599070866904
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.027376001079877216
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,1,128,1,float16,float16,0,0.025306666890780132
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.021226666867733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,1,128,1,float16,fp8,0,0.019173332800467808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,float16,0,0.021141332884629566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.027317332724730175
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.021173333128293354
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,float16,0,0.01918399954835574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,float16,0,0.17303466796875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,1,128,1,float16,float16,0,0.01923199991385142
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,float16,0,0.019461333751678467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,1,128,1,float16,fp8,0,0.017423999806245167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.019152000546455383
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.01782400036851565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,1,128,1,float16,float16,0,0.0173333336909612
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,1,128,1,float16,fp8,0,0.01738133281469345
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,float16,0,0.018842666099468868
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.19220799207687378
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.017279999951521557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.019248000035683315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,float16,0,0.03160000095764796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,float16,0,0.018405333161354065
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,2,1,128,1,float16,float16,0,0.09946133693059285
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,1,128,1,float16,float16,0,0.019258666783571243
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,float16,0,0.08577600121498108
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.08221333225568135
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,1,128,1,float16,float16,0,0.04638933142026266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,1,128,1,float16,fp8,0,0.053642665346463524
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,1,128,1,float16,float16,0,0.02128533273935318
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,float16,0,0.0429013321797053
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.05092266698678335
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.08242133259773254
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,1,128,1,float16,float16,0,0.029472000896930695
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,float16,0,0.04354666670163473
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,1,128,1,float16,fp8,0,0.0335413341720899
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.050623998045921326
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,float16,0,0.021189334491888683
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.033488000432650246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,1,128,1,float16,fp8,0,0.02384000023206075
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,float16,0,0.029296000798543293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.03356266766786575
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,float16,0,0.021269333859284718
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,1,128,1,float16,float16,0,0.017535999417304993
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,float16,0,0.02143999934196472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.023631999890009563
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,float16,0,0.01932799940307935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.019248000035683315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,1,128,1,float16,fp8,0,0.01711999997496605
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,float16,0,0.029391999046007793
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,1,128,1,float16,float16,0,0.021216000119845074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,2,1,128,1,float16,fp8,0,0.0990133285522461
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,float16,0,0.015813333292802174
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.02533866713444392
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,1,128,1,float16,float16,0,0.015205333630243937
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,1,128,1,float16,fp8,0,0.01921066641807556
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,1,128,1,float16,fp8,0,0.015141333142916361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,float16,0,0.08547733227411906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,float16,0,0.015344000111023584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,1,128,1,float16,float16,0,0.016085332880417507
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.015253332753976187
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,1,128,1,float16,fp8,0,0.015114666273196539
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,float16,0,0.017242666333913803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,float16,0,0.015168000012636185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.015237333873907724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.0170666662355264
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,1,128,1,float16,fp8,0,0.01498666654030482
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,float16,0,0.017162666966517765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,float16,0,0.015194666882356008
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,1,128,1,float16,float16,0,0.015146666516860327
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,float16,0,0.015189333508412043
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,2,1,128,1,float16,float16,0,0.046240001916885376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.019194666296243668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,float16,0,0.04475200176239014
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.05197866757710775
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,2,1,128,1,float16,fp8,0,0.056090667843818665
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,float16,0,0.043893332282702126
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.05186666548252106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.016970666746298473
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,1,128,1,float16,float16,0,0.019173332800467808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.03158933420976003
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,float16,0,0.02739199995994568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.03169066707293192
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,float16,0,0.01903466631968816
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.021418665846188862
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,1,128,1,float16,float16,0,0.01516266663869222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,1,128,1,float16,fp8,0,0.017557332913080852
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,float16,0,0.01933866615096728
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,float16,0,0.0273333340883255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.02203733225663503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,1,128,1,float16,float16,0,0.013066666821638743
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,1,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,float16,0,0.015253332753976187
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.01709866647919019
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,float16,0,0.013141332815090815
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,1,128,1,float16,float16,0,0.012810666114091873
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,1,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,float16,0,0.01314666618903478
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,float16,0,0.012191999703645706
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,float16,0,0.015413332730531693
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,1,128,1,float16,float16,0,0.013178666432698568
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,1,128,1,float16,fp8,0,0.012181332955757776
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,float16,0,0.013221333424250284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.012794667234023413
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,float16,0,0.012981332838535309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.011541333049535751
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,1,128,1,float16,float16,0,0.01301866645614306
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,1,128,1,float16,fp8,0,0.011509332805871964
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,1,128,1,float16,float16,0,0.03129599988460541
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.011450666934251785
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.011237333218256632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,1,128,1,float16,float16,0,0.011317333827416102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,1,128,1,float16,fp8,0,0.021242665747801464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,1,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,1,128,1,float16,fp8,0,0.03363200028737386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,2,1,128,1,float16,float16,0,0.03594133257865906
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.015013333410024643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.015216000378131866
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,float16,0,0.029722665747006733
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.035402665535608925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,float16,0,0.011007999380429586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,1,128,1,float16,float16,0,0.023215999205907185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,1,128,1,float16,fp8,0,0.025466665625572205
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,float16,0,0.029322666426499683
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.03566933423280716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,float16,0,0.01302933320403099
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,float16,0,0.01221866657336553
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,1,128,1,float16,float16,0,0.015135999768972397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,float16,0,0.019189332922299702
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.021967999637126923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,2,1,128,1,float16,fp8,0,0.03976000100374222
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,1,128,1,float16,fp8,0,0.014271999398867289
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.016069332758585613
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,float16,0,0.019285333653291065
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,float16,0,0.013221333424250284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.021194666624069214
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,1,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,1,128,1,float16,fp8,0,0.012005332857370377
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,float16,0,0.012991999586423239
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,float16,0,0.011296000331640244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,1,128,1,float16,float16,0,0.011317333827416102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,float16,0,0.011039999624093374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,float16,0,0.015061333775520325
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.011264000087976456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.011285333583752314
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,1,128,1,float16,float16,0,0.011109333485364914
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,1,128,1,float16,float16,0,0.01184533288081487
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,1,128,1,float16,fp8,0,0.01202133297920227
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,float16,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,float16,0,0.011301333705584208
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,float16,0,0.015290666371583939
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.011258666714032492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,1,128,1,float16,float16,0,0.013173333058754602
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,float16,0,0.010960000256697336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,float16,0,0.011125333607196808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.01116266722480456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.011274666835864386
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,1,128,1,float16,float16,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.011621333658695221
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,float16,0,0.011066666493813196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.011087999989589056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,1,128,1,float16,fp8,0,0.011343999455372492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,2,1,128,1,float16,float16,0,0.02920000006755193
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,float16,0,0.02309333284695943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,2,1,128,1,float16,fp8,0,0.029680001238981884
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,1,128,1,float16,float16,0,0.019306667149066925
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,1,128,1,float16,fp8,0,0.011071999867757162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,1,128,1,float16,fp8,0,0.012309333930412928
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,float16,0,0.023152001202106476
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,float16,0,0.01509333277742068
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.01728533332546552
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,1,128,1,float16,float16,0,0.015050667027632395
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,1,128,1,float16,fp8,0,0.014949332922697067
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,float16,0,0.015141333142916361
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.017610666652520496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.025429333249727886
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,1,128,1,float16,fp8,0,0.01945066700379054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.013418667018413544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,1,128,1,float16,fp8,0,0.011370666325092316
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,float16,0,0.013189333180586496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.013306666165590286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,float16,0,0.011039999624093374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,float16,0,0.011253333340088526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,1,128,1,float16,fp8,0,0.011264000087976456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,float16,0,0.011087999989589056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,1,128,1,float16,float16,0,0.01101333275437355
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,1,128,1,float16,fp8,0,0.011002667248249054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,float16,0,0.011045332998037338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.011055999745925268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,float16,0,0.010981333752473196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,1,128,1,float16,float16,0,0.014405333747466406
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,1,128,1,float16,float16,0,0.014058666924635569
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,float16,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,float16,0,0.012954667210578918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,1,128,1,float16,float16,0,0.011039999624093374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,1,128,1,float16,float16,0,0.011098666737476984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.02521066615978877
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,float16,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,float16,0,0.010960000256697336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,1,128,1,float16,float16,0,0.01098666712641716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,1,128,1,float16,float16,0,0.011312000453472137
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,1,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.01128000020980835
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,2,1,128,1,float16,float16,0,0.025146665672461193
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,1,128,1,float16,float16,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,float16,0,0.01922133316596349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,2,1,128,1,float16,fp8,0,0.025306666890780132
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.01913600042462349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,float16,0,0.011055999745925268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,1,128,1,float16,float16,0,0.017237332959969837
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,1,128,1,float16,fp8,0,0.01718933383623759
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,float16,0,0.01907733331123988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,float16,0,0.01313599944114685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.014293332894643148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,1,128,1,float16,fp8,0,0.011296000331640244
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,1,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,float16,0,0.01322666679819425
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.014511999984582266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.0116799995303154
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,1,128,1,float16,float16,0,0.010954666882753372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,1,128,1,float16,fp8,0,0.011247999966144562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,float16,0,0.0122079998254776
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,1,128,1,float16,float16,0,0.010922666639089584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,1,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.011178666104873022
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,float16,0,0.010960000256697336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.011173332730929056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,float16,0,0.010922666639089584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,1,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,1,128,1,float16,float16,0,0.011130666981140772
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,1,128,1,float16,fp8,0,0.010911999891201654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,float16,0,0.011226666470368704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,float16,0,0.011039999624093374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.011317333827416102
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.01138666644692421
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,1,128,1,float16,float16,0,0.010944000134865442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,1,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,float16,0,0.01091733326514562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,1,128,1,float16,float16,0,0.013002666334311167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,float16,0,0.011066666493813196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,1,128,1,float16,float16,0,0.011071999867757162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,1,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.011183999478816986
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,float16,0,0.010981333752473196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,float16,0,0.011045332998037338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,float16,0,0.011130666981140772
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.019445333629846573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,1,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,float16,0,0.01116266722480456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,float16,0,0.01121066634853681
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,0,0.011461333682139715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.011066666493813196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,0,0.011034666250149408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.011237333218256632
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,1,128,1,float16,float16,0,0.01098666712641716
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,0,0.010885333021481832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,0,0.01090666651725769
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,0,0.009258666386206945
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,0,0.009205333267649015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,0,0.009690666571259499
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,0,0.008874666566650072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.009786666681369146
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,0,0.011066666493813196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,0,0.00897066667675972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,0,0.00903466654320558
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,0,0.008986666798591614
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.009125333279371262
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,0,0.011498666057984034
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,0,0.009088000282645226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,0,0.011055999745925268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.00922133338948091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,0,0.009216000015536943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,0,0.008874666566650072
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.00922133338948091
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.009216000015536943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,0,0.00919999989370505
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.009338666374484697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,0,0.009216000015536943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.009248000259200731
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,0,0.009509333098928133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,0,0.009136000027259191
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,0,0.008858666444818178
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.009359999870260557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,0,0.008922666932145754
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,0,0.00892800030608972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,0,0.009109333157539368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,0,0.00897066667675972
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.009088000282645226
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,0,0.00914666677514712
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,float16,0,0.3848426739374797
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,0.3492799997329712
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,0,0.008992000172535578
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,0.3487893342971802
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,float16,0,0.2400053342183431
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,0.22609599431355795
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,float16,0,0.23938665787378946
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,float16,0,0.22298133373260498
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,0,0.008997333546479544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.20801067352294922
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.20939199129740396
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,0.233514666557312
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,0,0.008938666433095932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,0.23470399777094522
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,0,0.00903466654320558
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,float16,0,0.18538665771484375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,0.22592000166575113
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,0.17287466923395792
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,float16,0,0.17100799083709717
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,float16,0,0.22323733568191528
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,float16,0,0.2463573416074117
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,float16,0,0.1832266648610433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,float16,0,0.3876853386561076
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,0.17689067125320435
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,0.1773759921391805
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,float16,0,0.15381866693496704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.14630400141080221
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.16051733493804932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,float16,0,0.1710240046183268
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.146314670642217
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,float16,0,0.14405333002408346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.13588800032933554
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,float16,0,0.14409599701563516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,0.17313067118326822
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,float16,0,0.23849066098531088
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,0.22014933824539185
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,0.2201226751009623
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,float16,0,0.15383999546368918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,float16,0,0.14193600416183472
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.160480002562205
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,0.1360266705354055
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,float16,0,0.14217600226402283
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,0.13595199584960938
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.12005866567293803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,float16,0,0.1827253301938375
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,float16,0,0.2384693423906962
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,float16,0,0.1174720029036204
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.11133333047231038
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,float16,0,0.11757866541544597
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.11130133271217346
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,float16,0,0.14958932995796204
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,0.1476586659749349
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,float16,0,0.15018666783968607
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,0.14908799529075623
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,float16,0,0.10943466424942017
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,float16,0,0.10859200358390808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,float16,0,0.1260533332824707
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,float16,0,0.12599466244379678
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,float16,0,0.09726933638254802
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.09389866391817729
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,float16,0,0.09706133604049683
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.09435199697812398
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,float16,0,0.09086933732032776
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.08685866991678874
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,float16,0,0.09085333347320557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.08701866865158081
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,float16,0,0.16153066356976828
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.15031466881434122
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,float16,0,0.16084800163904825
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.15014400084813437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.13593600193659464
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,float16,0,0.09086933732032776
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.11986133456230164
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,float16,0,0.09082133571306865
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.09015466769536336
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.07473599910736084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.07473066449165344
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,float16,0,0.06841599941253662
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.10702932874361674
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.06654933094978333
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,float16,0,0.06878933310508728
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,float16,0,0.2412266731262207
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.09071999788284302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.0622026671965917
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,float16,0,0.06459199885527293
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.06204266846179962
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,float16,0,0.07658666869004567
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,float16,0,0.1018399993578593
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.10417067011197408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.10697066783905029
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,float16,0,0.07195733487606049
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,float16,0,0.07145066559314728
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.06630399823188782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.07209066549936931
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,float16,0,0.06447466711203258
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,float16,0,0.06015466650327047
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,float16,0,0.060175999999046326
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,float16,0,0.07672533392906189
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,float16,0,0.05413866539796194
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.05402666827042898
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,float16,0,0.10284800330797832
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,float16,0,0.05397333204746246
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.0727040022611618
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.05400000015894572
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,float16,0,0.05192000170548757
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,float16,0,0.052042668064435325
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.05009066561857859
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,float16,0,0.1843679944674174
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,float16,0,0.11852266391118367
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.11184533437093098
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,float16,0,0.11772800485293071
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,float16,0,0.06468266745408376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.06822933256626129
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.06806399921576183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.052111998200416565
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,float16,0,0.051557332277297974
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.04982399940490723
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.052298665046691895
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.10558933019638062
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.06002133091290792
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.04401599864164988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.06016000111897787
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.11412266890207927
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.039674667020638786
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,float16,0,0.039749334255854286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.03966933240493139
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,float16,0,0.0498986691236496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.037615999579429626
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.03731200098991394
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,float16,0,0.07548800110816956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,float16,0,0.043663998444875084
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,float16,0,0.043749332427978516
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,float16,0,0.07606933514277141
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.08411733309427898
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,float16,0,0.06417066852251689
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,float16,0,0.03764266769091288
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.05400000015894572
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,float16,0,0.03769599894682566
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,float16,0,0.040720000863075256
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.04307200014591217
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.04326933125654856
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,float16,0,0.03562133262554804
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.08308800061543782
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.043653334180514015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,float16,0,0.03551999976237615
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,float16,0,0.03219199925661087
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.033173332611719765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.053930665055910744
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,float16,0,0.03346666693687439
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,float16,0,0.04074666649103165
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,float16,0,0.03143466760714849
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,float16,0,0.03149333347876867
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.03047466774781545
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.035562666753927864
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,float16,0,0.03990933299064636
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,float16,0,0.09966400265693665
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.09287466605504353
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,float16,0,0.0513973335425059
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,float16,0,0.09858666857083638
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.09527466694513957
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,float16,0,0.05192000170548757
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.033573334415753685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,float16,0,0.037871999045213066
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.03148266673088074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.04179200033346812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.041434665520985924
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,float16,0,0.03156266609827677
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.03281066566705704
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,float16,0,0.031157332162062328
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.035573333501815796
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,float16,0,0.027466667195161183
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.027258666853109997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,float16,0,0.02752000093460083
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.02735999971628189
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.05586666862169901
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.025290665527184803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.055589333176612854
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.025279998779296875
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,float16,0,0.023541333774725597
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.023258666197458904
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,float16,0,0.023605334262053173
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.02333866556485494
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,float16,0,0.05178666611512502
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,float16,0,0.08899199962615967
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.08264000217119853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,float16,0,0.0867733359336853
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,float16,0,0.04541333516438802
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.08572799960772197
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,float16,0,0.02566933383544286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,float16,0,0.045114666223526
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,float16,0,0.037685332198937736
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.03523733218510946
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,float16,0,0.03173333406448364
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,float16,0,0.025018667181332905
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.03296533226966858
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.02735999971628189
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,float16,0,0.025626666843891144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.027482666075229645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,float16,0,0.021183999876181286
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,float16,0,0.05209066470464071
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.021322667598724365
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.04971200227737427
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.02165333429972331
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,float16,0,0.019381333142518997
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.01924266666173935
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,float16,0,0.025434667865435284
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,float16,0,0.01727466657757759
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.018245333184798557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,float16,0,0.017162666966517765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.04990933338801066
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,float16,0,0.017338667064905167
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,float16,0,0.021312000850836437
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.017279999951521557
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,float16,0,0.04182399809360504
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.04683200021584829
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,float16,0,0.04172799984614054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.04737600187460581
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.031744000812371574
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.01791999985774358
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,float16,0,0.027514666318893433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,float16,0,0.03173866619666418
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.03162133445342382
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.0235359991590182
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.023210667073726654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.035504000882307686
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,float16,0,0.01730666682124138
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,float16,0,0.017103999853134155
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.019333332777023315
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,float16,0,0.015637333194414776
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.01732800031701724
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,float16,0,0.028959999481836956
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,float16,0,0.017162666966517765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.015013333410024643
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,float16,0,0.015125333021084467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,float16,0,0.019466667125622433
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.01523200049996376
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,float16,0,0.015263999501864115
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.0191040001809597
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,float16,0,0.015125333021084467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,float16,0,0.015061333775520325
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,float16,0,0.014997333288192749
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,float16,0,0.021216000119845074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,float16,0,0.025546667476495106
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.030048000315825146
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,float16,0,0.025349333882331848
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,float16,0,0.015087999403476715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.02943466603755951
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,float16,0,0.019173332800467808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.021194666624069214
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,float16,0,0.015247999380032221
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,float16,0,0.015087999403476715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.017210666090250015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,float16,0,0.015103999525308609
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,float16,0,0.013189333180586496
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.014122666170199713
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,float16,0,0.013295999417702356
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,float16,0,0.012954667210578918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.017221332838137943
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,float16,0,0.012928000340859095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.017157333592573803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,float16,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,float16,0,0.021429332594076794
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,float16,0,0.011999999483426413
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,float16,0,0.01310933381319046
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,float16,0,0.011066666493813196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.011663999408483505
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,float16,0,0.019199999670187633
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.021221332252025604
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,float16,0,0.019285333653291065
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,float16,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,float16,0,0.01929066702723503
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.021231998999913532
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.016783999900023144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,float16,0,0.015066667149464289
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,float16,0,0.013007999708255133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.01333333303531011
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,float16,0,0.013114667187134424
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,float16,0,0.011968000481526056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,float16,0,0.011600000162919363
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.012757333616415659
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.021168000996112823
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,float16,0,0.011333333949247995
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,float16,0,0.015109332899252573
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,float16,0,0.011258666714032492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.011231999844312668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.015461333096027374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.013301332791646322
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.011264000087976456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,float16,0,0.011018666128317514
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.01129066695769628
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,float16,0,0.011066666493813196
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,float16,0,0.011264000087976456
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,float16,0,0.010954666882753372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.011989332735538483
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.011215999722480774
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,float16,0,0.015173333386580149
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,float16,0,0.010837333897749582
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,float16,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.013338666409254074
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,float16,0,0.011231999844312668
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.011461333682139715
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,float16,0,0.010944000134865442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,float16,0,0.01515199989080429
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.017242666333913803
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,float16,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.011045332998037338
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,float16,0,0.01118933285276095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,float16,0,0.011039999624093374
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,float16,0,0.011221333096424738
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.011071999867757162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,float16,0,0.01102399950226148
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,float16,0,0.01110400011142095
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,float16,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,float16,0,0.011061333119869232
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,float16,0,0.011157333850860596
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,float16,0,0.012981332838535309
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,float16,0,0.012965332716703415
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.014229333649079004
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.012159999459981918
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,float16,0,0.011071999867757162
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.011328000575304031
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,float16,0,0.011146667102972666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,float16,0,0.011146667102972666
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,float16,0,0.011557333171367645
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.011098666737476984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,float16,0,0.010874666273593903
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.011125333607196808
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,float16,0,0.01126933346192042
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,float16,0,0.01097600037852923
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,float16,0,0.011258666714032492
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,float16,0,0.010858666151762009
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,float16,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.011391999820868174
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.011173332730929056
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,float16,0,0.011002667248249054
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,float16,0,0.010992000500361124
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.01109333336353302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,float16,0,0.011141333729028702
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.011098666737476984
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,float16,0,0.010911999891201654
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,float16,0,0.010922666639089584
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,0,0.010954666882753372
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.010714666297038397
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,0,0.010101333260536194
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,0,0.008901333436369896
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,0,0.009925333162148794
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,float16,0,0.013424000392357508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,0,0.009205333267649015
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,0,0.00895999992887179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,0,0.009056000038981438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.009472000102202097
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.009285333255926767
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,0,0.009045333291093508
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,float16,0,0.012997332960367203
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.009183999771873156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.009248000259200731
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.01108266661564509
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,0,0.009077333534757296
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,0,0.009098666409651438
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.008863999818762144
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,0,0.00902399979531765
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.009183999771873156
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,0,0.008890666688481966
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,0,0.008885333314538002
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,0,0.00895999992887179
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.009061333412925402
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.009248000259200731
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,0,0.008997333546479544
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.009248000259200731
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,0,0.008933333059151968
VLLM,0.16.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.009205333267649015
