framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,2,128,1,float16,fp8,0,19.829055786132812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,8,128,1,float16,float16,0,21.570704142252605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,1,128,1,float16,float16,0,21.571802775065105
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,8,128,1,float16,fp8,0,19.82699203491211
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,4,128,1,float16,float16,0,21.555702209472656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,4,128,1,float16,fp8,0,19.822293599446613
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,2,128,1,float16,float16,0,21.576820373535156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,64,128,1,float16,float16,0,10.868789672851562
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,64,1,128,1,float16,fp8,0,19.841205596923828
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,64,128,1,float16,fp8,0,9.990730921427408
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,1,128,1,float16,fp8,0,10.61797841389974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,2,128,1,float16,float16,0,10.842234293619791
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,1,128,1,float16,float16,0,15.777482350667318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,2,128,1,float16,fp8,0,10.661845525105795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,4,128,1,float16,float16,0,10.855519612630209
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,4,128,1,float16,fp8,0,10.635183970133463
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,8,128,1,float16,fp8,0,9.99950917561849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,64,128,1,float16,fp8,0,5.051178614298503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,64,8,128,1,float16,float16,0,10.849589029947916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,64,128,1,float16,float16,0,5.501183827718099
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,1,128,1,float16,fp8,0,5.378437042236328
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,2,128,1,float16,float16,0,6.793210983276367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,1,128,1,float16,float16,0,7.625290552775065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,2,128,1,float16,fp8,0,5.367205301920573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,4,128,1,float16,fp8,0,5.045098622639974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,8,128,1,float16,float16,0,5.491621017456055
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,4,128,1,float16,float16,0,5.893349329630534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,64,8,128,1,float16,fp8,0,5.3656266530354815
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,64,128,1,float16,float16,0,2.8146400451660156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,1,128,1,float16,fp8,0,2.7464160919189453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,1,128,1,float16,float16,0,3.6446078618367515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,64,128,1,float16,fp8,0,2.6845385233561196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,2,128,1,float16,float16,0,2.808629353841146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,2,128,1,float16,fp8,0,2.7454026540120444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,4,128,1,float16,fp8,0,2.580176035563151
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,4,128,1,float16,float16,0,3.413717269897461
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,8,128,1,float16,float16,0,2.8077920277913413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,64,8,128,1,float16,fp8,0,2.58568541208903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,1,128,1,float16,fp8,0,11.390106201171875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,2,128,1,float16,fp8,0,11.388437906901041
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,2,128,1,float16,float16,0,12.366432189941406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,4,128,1,float16,float16,0,12.37063980102539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,4,128,1,float16,fp8,0,12.21246337890625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,1,128,1,float16,float16,0,17.840539296468098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,1,128,1,float16,float16,0,6.233813603719075
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,1,128,1,float16,fp8,0,5.736026763916016
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,64,128,1,float16,float16,0,6.260021209716797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,2,128,1,float16,float16,0,6.254373550415039
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,8,128,1,float16,float16,0,12.368240356445312
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,64,8,128,1,float16,fp8,0,12.146661122639975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,2,128,1,float16,fp8,0,6.27296511332194
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,4,128,1,float16,float16,0,6.2358824412028
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,4,128,1,float16,fp8,0,5.736682891845703
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,8,128,1,float16,fp8,0,5.747823715209961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,1,128,1,float16,float16,0,3.9875094095865884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,64,128,1,float16,fp8,0,5.754453023274739
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,64,128,1,float16,float16,0,3.1768480936686196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,1,128,1,float16,fp8,0,3.115034739176432
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,64,8,128,1,float16,float16,0,8.199167887369791
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,2,128,1,float16,float16,0,3.86896546681722
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,2,128,1,float16,fp8,0,3.1099414825439453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,4,128,1,float16,float16,0,3.950160026550293
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,4,128,1,float16,fp8,0,3.1115681330362954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,8,128,1,float16,fp8,0,2.9135360717773438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,8,128,1,float16,float16,0,3.168293317159017
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,1,128,1,float16,float16,0,1.9962453842163086
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,1,128,1,float16,fp8,0,1.9149492581685383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,64,128,1,float16,fp8,0,1.5884373982747395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,64,64,128,1,float16,fp8,0,2.918922742207845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,64,128,1,float16,float16,0,1.6982612609863281
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,4,128,1,float16,fp8,0,1.6166027386983235
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,8,128,1,float16,fp8,0,1.6209653218587239
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,2,128,1,float16,float16,0,1.6343520482381184
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,2,128,1,float16,fp8,0,1.6050880750020344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,1,128,1,float16,fp8,0,8.035482406616211
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,2,128,1,float16,float16,0,8.729626973470053
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,1,128,1,float16,float16,0,10.344015757242838
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,2,128,1,float16,fp8,0,8.60751978556315
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,4,128,1,float16,float16,0,1.7380800247192383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,4,128,1,float16,float16,0,8.730154673258463
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,4,128,1,float16,fp8,0,8.029365539550781
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,64,8,128,1,float16,float16,0,1.7001280784606934
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,64,128,1,float16,fp8,0,4.067141215006511
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,64,128,1,float16,float16,0,4.457269350687663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,8,128,1,float16,float16,0,8.73412831624349
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,64,8,128,1,float16,fp8,0,8.597573598225912
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,1,128,1,float16,float16,0,4.411472002665202
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,1,128,1,float16,fp8,0,4.338543891906738
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,2,128,1,float16,float16,0,4.405989329020183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,2,128,1,float16,fp8,0,4.057717323303223
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,4,128,1,float16,fp8,0,4.33570671081543
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,8,128,1,float16,fp8,0,4.353466669718425
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,1,128,1,float16,fp8,0,2.0683306058247886
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,8,128,1,float16,float16,0,5.452725092569987
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,64,4,128,1,float16,float16,0,5.558890660603841
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,1,128,1,float16,float16,0,2.761103947957357
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,64,128,1,float16,fp8,0,2.071135997772217
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,64,128,1,float16,float16,0,2.254213333129883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,2,128,1,float16,float16,0,2.246234734853109
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,2,128,1,float16,fp8,0,2.2087146441141763
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,4,128,1,float16,float16,0,2.405855973561605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,8,128,1,float16,fp8,0,2.323808034261068
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,4,128,1,float16,fp8,0,2.475759983062744
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,64,8,128,1,float16,float16,0,2.424250602722168
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,1,128,1,float16,float16,0,1.3894400596618652
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,64,128,1,float16,fp8,0,1.1443146864573162
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,1,128,1,float16,fp8,0,1.2035573323567708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,2,128,1,float16,float16,0,1.2250293095906575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,2,128,1,float16,fp8,0,1.1463092962900798
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,64,128,1,float16,float16,0,1.1726933320363362
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,4,128,1,float16,float16,0,1.4025492668151855
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,8,128,1,float16,fp8,0,1.1522239844004314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,8,128,1,float16,float16,0,1.428426742553711
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,64,4,128,1,float16,fp8,0,1.1447947025299072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,1,128,1,float16,float16,0,11.359092712402344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,1,128,1,float16,fp8,0,11.21884282430013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,2,128,1,float16,fp8,0,11.251903533935547
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,4,128,1,float16,float16,0,11.373296101888021
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,4,128,1,float16,fp8,0,11.018112182617188
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,2,128,1,float16,float16,0,16.179882049560547
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,8,128,1,float16,float16,0,11.37182362874349
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,1,128,1,float16,fp8,0,5.6467946370442705
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,64,8,128,1,float16,fp8,0,10.466890970865885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,2,128,1,float16,float16,0,6.882687886555989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,64,128,1,float16,float16,0,5.7458241780598955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,1,128,1,float16,float16,0,8.009066899617514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,2,128,1,float16,fp8,0,5.8376108805338545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,4,128,1,float16,float16,0,5.719674428304036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,4,128,1,float16,fp8,0,5.853375752766927
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,8,128,1,float16,float16,0,5.7188161214192705
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,64,128,1,float16,float16,0,2.908405303955078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,8,128,1,float16,fp8,0,5.365397135416667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,1,128,1,float16,float16,0,3.594122568766276
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,64,64,128,1,float16,fp8,0,5.282848040262858
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,1,128,1,float16,fp8,0,2.855039914449056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,2,128,1,float16,float16,0,3.512928009033203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,2,128,1,float16,fp8,0,2.854186693827311
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,4,128,1,float16,fp8,0,2.850266774495443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,4,128,1,float16,float16,0,3.5793228149414062
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,1,128,1,float16,float16,0,1.4821866353352864
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,8,128,1,float16,fp8,0,2.85694948832194
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,8,128,1,float16,float16,0,3.0549866358439126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,64,128,1,float16,float16,0,1.4904160499572754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,1,128,1,float16,fp8,0,1.363269329071045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,2,128,1,float16,fp8,0,1.363493283589681
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,2,128,1,float16,float16,0,1.8235467274983723
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,64,128,1,float16,fp8,0,1.5000640551249187
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,64,64,128,1,float16,fp8,0,2.758533477783203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,4,128,1,float16,float16,0,1.5863572756449382
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,4,128,1,float16,fp8,0,1.4747999509175618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,8,128,1,float16,fp8,0,1.460565408070882
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,2,128,1,float16,fp8,0,0.7198987007141113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,1,128,1,float16,fp8,0,0.769221305847168
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,1,128,1,float16,float16,0,0.8542453447977701
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,4,128,1,float16,float16,0,0.9394720395406088
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,4,128,1,float16,fp8,0,0.7169813315073649
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,64,128,1,float16,float16,0,0.7808533509572347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,64,128,1,float16,fp8,0,0.7414560317993164
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,8,128,1,float16,fp8,0,0.768613338470459
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,8,128,1,float16,float16,0,0.7757919629414877
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,64,8,128,1,float16,float16,0,1.4832639694213867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,1,128,1,float16,float16,0,6.620533625284831
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,64,2,128,1,float16,float16,0,0.7763360341389974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,1,128,1,float16,fp8,0,6.551530838012695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,2,128,1,float16,fp8,0,6.100986480712891
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,4,128,1,float16,fp8,0,6.104384104410808
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,4,128,1,float16,float16,0,6.6312001546223955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,2,128,1,float16,float16,0,9.107226689656576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,1,128,1,float16,fp8,0,3.075898806254069
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,1,128,1,float16,float16,0,4.2972211837768555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,2,128,1,float16,float16,0,4.208021481831868
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,8,128,1,float16,float16,0,6.631872177124023
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,64,128,1,float16,float16,0,3.3740107218424478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,64,128,1,float16,fp8,0,3.095701217651367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,2,128,1,float16,fp8,0,3.7254721323649087
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,4,128,1,float16,float16,0,3.3493547439575195
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,4,128,1,float16,fp8,0,3.4975627263387046
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,8,128,1,float16,fp8,0,3.2999092737833657
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,64,8,128,1,float16,float16,0,3.7004639307657876
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,64,128,1,float16,fp8,0,1.5705386797587078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,1,128,1,float16,fp8,0,1.6754773457845051
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,64,128,1,float16,float16,0,1.7136054039001465
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,2,128,1,float16,float16,0,1.7187412579854329
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,2,128,1,float16,fp8,0,1.6735733350118
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,4,128,1,float16,fp8,0,1.5639413197835286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,4,128,1,float16,float16,0,1.8718026479085286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,8,128,1,float16,float16,0,1.7059359550476074
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,8,128,1,float16,fp8,0,1.5641919771830242
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,64,8,128,1,float16,fp8,0,6.102816263834636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,1,128,1,float16,float16,0,0.8766293525695801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,1,128,1,float16,fp8,0,0.8846293290456136
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,2,128,1,float16,float16,0,1.0519413153330486
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,4,128,1,float16,float16,0,1.0720266501108806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,2,128,1,float16,fp8,0,0.8066613674163818
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,64,128,1,float16,float16,0,0.8835253715515137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,8,128,1,float16,fp8,0,0.8634826342264811
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,8,128,1,float16,float16,0,0.8833440144856771
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,64,1,128,1,float16,float16,0,2.0442186991373696
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,1,128,1,float16,fp8,0,0.4283039967219035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,1,128,1,float16,float16,0,0.555951992670695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,2,128,1,float16,fp8,0,0.4641546805699666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,64,128,1,float16,float16,0,0.47460798422495526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,64,128,1,float16,fp8,0,0.43113064765930176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,4,128,1,float16,fp8,0,0.46084264914194745
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,8,128,1,float16,fp8,0,0.42903467019399005
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,8,128,1,float16,float16,0,0.5773439804712931
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,4,128,1,float16,fp8,0,0.8066879908243815
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,64,64,128,1,float16,fp8,0,0.8097546895345052
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,2,128,1,float16,float16,0,0.500981330871582
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,64,4,128,1,float16,float16,0,0.5089173316955566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,1,128,1,float16,fp8,0,5.7812957763671875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,1,128,1,float16,float16,0,8.46456527709961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,2,128,1,float16,float16,0,7.369375864664714
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,2,128,1,float16,fp8,0,6.194847742716472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,4,128,1,float16,float16,0,6.268917083740234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,4,128,1,float16,fp8,0,5.7833811442057295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,1,128,1,float16,float16,0,4.085445404052734
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,8,128,1,float16,float16,0,6.272752126057942
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,1,128,1,float16,fp8,0,3.1271680196126304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,64,128,1,float16,float16,0,3.2156480153401694
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,64,8,128,1,float16,fp8,0,5.785984039306641
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,2,128,1,float16,float16,0,3.9114561080932617
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,2,128,1,float16,fp8,0,2.909071922302246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,4,128,1,float16,fp8,0,3.118373235066732
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,4,128,1,float16,float16,0,4.067557334899902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,8,128,1,float16,float16,0,3.378016153971354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,64,128,1,float16,float16,0,1.6274933815002441
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,1,128,1,float16,float16,0,1.5964372952779133
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,8,128,1,float16,fp8,0,3.115071932474772
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,64,128,1,float16,fp8,0,1.6274293263753254
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,1,128,1,float16,fp8,0,1.5750239690144856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,2,128,1,float16,float16,0,2.211850643157959
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,2,128,1,float16,fp8,0,1.5760213534037273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,4,128,1,float16,fp8,0,1.4723199208577473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,8,128,1,float16,fp8,0,1.601077397664388
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,4,128,1,float16,float16,0,1.9512747128804524
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,64,64,128,1,float16,fp8,0,2.9841012954711914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,1,128,1,float16,float16,0,0.8858186403910319
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,1,128,1,float16,fp8,0,0.754810651143392
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,2,128,1,float16,float16,0,0.8215839862823486
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,2,128,1,float16,fp8,0,0.8845439751942953
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,4,128,1,float16,fp8,0,0.764794667561849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,8,128,1,float16,float16,0,0.8200533390045166
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,4,128,1,float16,float16,0,0.9747253259023031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,8,128,1,float16,fp8,0,0.7559200127919515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,1,128,1,float16,fp8,0,0.426693320274353
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,2,128,1,float16,float16,0,0.4309120178222656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,2,128,1,float16,fp8,0,0.4283519983291626
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,64,128,1,float16,fp8,0,0.3989173173904419
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,64,128,1,float16,float16,0,0.49385066827138263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,4,128,1,float16,float16,0,0.43040533860524494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,4,128,1,float16,fp8,0,0.42531200249989826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,8,128,1,float16,float16,0,0.4313013156255086
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,8,128,1,float16,fp8,0,0.43106667200724286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,64,8,128,1,float16,float16,0,1.7043733596801758
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,1,128,1,float16,float16,0,0.25596799453099567
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,1,128,1,float16,fp8,0,0.218831996122996
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,2,128,1,float16,fp8,0,0.23244265715281168
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,64,128,1,float16,fp8,0,0.2184320092201233
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,64,128,1,float16,float16,0,0.257968008518219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,64,128,1,float16,fp8,0,0.8364906311035156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,64,64,128,1,float16,float16,0,0.8329919974009196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,8,128,1,float16,float16,0,0.23736000061035156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,8,128,1,float16,fp8,0,0.23441600799560547
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,64,1,128,1,float16,float16,0,0.5141706864039103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,1,128,1,float16,fp8,0,3.468245188395182
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,2,128,1,float16,float16,0,3.74833075205485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,1,128,1,float16,float16,0,3.9027414321899414
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,2,128,1,float16,float16,0,0.26980799436569214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,4,128,1,float16,float16,0,3.753493309020996
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,4,128,1,float16,fp8,0,0.23185600837071738
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,4,128,1,float16,fp8,0,3.463285446166992
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,8,128,1,float16,float16,0,3.754826545715332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,1,128,1,float16,float16,0,2.31823460261027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,8,128,1,float16,fp8,0,3.7057441075642905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,64,128,1,float16,fp8,0,1.7629119555155437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,64,128,1,float16,float16,0,1.929082711537679
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,1,128,1,float16,fp8,0,1.8657280604044597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,64,2,128,1,float16,fp8,0,3.46342404683431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,2,128,1,float16,float16,0,2.258794625600179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,2,128,1,float16,fp8,0,2.0217812856038413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,4,128,1,float16,float16,0,2.0203146934509277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,64,4,128,1,float16,float16,0,0.2409600019454956
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,4,128,1,float16,fp8,0,1.8663253784179688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,1,128,1,float16,fp8,0,0.9561493396759033
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,8,128,1,float16,float16,0,1.8933919270833333
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,64,8,128,1,float16,fp8,0,1.7504159609476726
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,2,128,1,float16,float16,0,0.9639893372853597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,64,128,1,float16,fp8,0,0.9360000292460123
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,64,128,1,float16,float16,0,0.9811893304189047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,2,128,1,float16,fp8,0,0.9473919868469238
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,4,128,1,float16,float16,0,1.1768853664398193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,4,128,1,float16,fp8,0,0.9511253039042155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,8,128,1,float16,fp8,0,0.9480799833933512
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,8,128,1,float16,float16,0,0.9632053375244141
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,1,128,1,float16,float16,0,0.4979039827982585
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,1,128,1,float16,fp8,0,0.4880746603012085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,2,128,1,float16,fp8,0,0.45930667718251544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,2,128,1,float16,float16,0,0.5877546469370524
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,4,128,1,float16,float16,0,0.534437338511149
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,4,128,1,float16,fp8,0,0.45955200990041095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,8,128,1,float16,float16,0,0.5975733200709025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,64,128,1,float16,fp8,0,0.4636213382085164
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,8,128,1,float16,fp8,0,0.4606986840565999
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,2,128,1,float16,float16,0,0.2675306598345439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,1,128,1,float16,fp8,0,0.2642506758371989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,2,128,1,float16,fp8,0,0.24542399247487387
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,64,64,128,1,float16,float16,0,0.5077973206837972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,64,1,128,1,float16,float16,0,1.161146640777588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,64,128,1,float16,fp8,0,0.24753065903981528
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,64,128,1,float16,float16,0,0.2715359926223755
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,8,128,1,float16,fp8,0,0.2630133430163066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,1,128,1,float16,float16,0,0.1664426624774933
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,1,128,1,float16,fp8,0,0.14757866660753885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,64,128,1,float16,fp8,0,0.1390613317489624
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,2,128,1,float16,float16,0,0.15965867042541504
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,2,128,1,float16,fp8,0,0.14966400464375815
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,4,128,1,float16,float16,0,0.16156267126401266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,4,128,1,float16,fp8,0,0.14920000235239664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,8,128,1,float16,float16,0,0.15133866667747498
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,8,128,1,float16,fp8,0,0.1365653375784556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,1,128,1,float16,float16,0,0.3073226610819499
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,4,128,1,float16,fp8,0,0.24633065859476724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,4,128,1,float16,float16,0,0.30693332354227704
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,1,128,1,float16,float16,0,3.710346539815267
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,64,64,128,1,float16,float16,0,0.1536906659603119
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,1,128,1,float16,fp8,0,3.432053248087565
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,4,128,1,float16,fp8,0,3.4462718963623047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,2,128,1,float16,fp8,0,3.4348427454630532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,4,128,1,float16,float16,0,4.45526917775472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,2,128,1,float16,float16,0,4.509135882059733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,8,128,1,float16,float16,0,3.9123093287150064
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,64,8,128,1,float16,float16,0,0.2667146722475688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,1,128,1,float16,float16,0,2.209200064341227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,1,128,1,float16,fp8,0,2.2064053217569985
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,2,128,1,float16,float16,0,2.2891359329223633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,64,128,1,float16,fp8,0,1.7519520123799641
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,64,128,1,float16,float16,0,1.9182507197062175
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,4,128,1,float16,float16,0,1.8693920771280925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,2,128,1,float16,fp8,0,1.9488159815470378
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,4,128,1,float16,fp8,0,2.0110079447428384
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,8,128,1,float16,float16,0,2.2344746589660645
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,1,128,1,float16,float16,0,1.1045280297597249
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,64,8,128,1,float16,fp8,0,1.9493707021077473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,1,128,1,float16,fp8,0,0.9415787061055502
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,64,128,1,float16,float16,0,1.1203413009643555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,64,128,1,float16,fp8,0,0.8865173657735189
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,2,128,1,float16,float16,0,1.1121973196665447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,4,128,1,float16,fp8,0,0.9411306381225586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,2,128,1,float16,fp8,0,0.9483520189921061
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,4,128,1,float16,float16,0,1.0939679940541585
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,8,128,1,float16,float16,0,1.0055840015411377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,2,128,1,float16,float16,0,0.5392373402913412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,1,128,1,float16,float16,0,0.4912000099817912
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,64,8,128,1,float16,fp8,0,0.9277493158976237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,1,128,1,float16,fp8,0,0.4931199947992961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,64,8,128,1,float16,fp8,0,3.4419307708740234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,2,128,1,float16,fp8,0,0.4781973361968994
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,64,128,1,float16,float16,0,0.5449973344802856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,4,128,1,float16,fp8,0,0.44971732298533124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,8,128,1,float16,float16,0,0.4919786850611369
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,1,128,1,float16,fp8,0,0.2358986735343933
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,64,128,1,float16,fp8,0,0.4545706510543823
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,8,128,1,float16,fp8,0,0.47917866706848145
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,2,128,1,float16,float16,0,0.2924319903055827
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,1,128,1,float16,float16,0,0.3049066662788391
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,2,128,1,float16,fp8,0,0.23616000016530356
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,4,128,1,float16,float16,0,0.26070932547251385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,64,128,1,float16,float16,0,0.2925386627515157
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,8,128,1,float16,fp8,0,0.23662932713826498
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,1,128,1,float16,fp8,0,0.14040000240008035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,64,128,1,float16,fp8,0,0.14072533448537192
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,64,128,1,float16,float16,0,0.14522133270899454
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,2,128,1,float16,fp8,0,0.1317866643269857
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,4,128,1,float16,fp8,0,0.1307413379351298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,4,128,1,float16,float16,0,0.16330132881800333
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,64,4,128,1,float16,float16,0,0.4920106728871663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,8,128,1,float16,float16,0,0.14274666706720987
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,8,128,1,float16,fp8,0,0.14106133580207825
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,4,128,1,float16,fp8,0,0.2529866695404053
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,1,128,1,float16,float16,0,0.08642133076985677
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,1,128,1,float16,fp8,0,0.08176533381144206
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,2,128,1,float16,float16,0,0.08802666266759236
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,2,128,1,float16,fp8,0,0.08141333361466725
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,8,128,1,float16,float16,0,0.28996266921361286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,4,128,1,float16,float16,0,0.0897706647713979
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,64,128,1,float16,float16,0,0.09399466713269551
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,64,128,1,float16,fp8,0,0.07853866616884868
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,1,128,1,float16,float16,0,0.15878400206565857
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,64,64,128,1,float16,fp8,0,0.24055999517440796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,1,128,1,float16,fp8,0,2.244069258371989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,4,128,1,float16,fp8,0,0.08250133196512859
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,2,128,1,float16,float16,0,2.310293356577555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,64,2,128,1,float16,float16,0,0.16078933080037436
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,4,128,1,float16,float16,0,2.310464064280192
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,4,128,1,float16,fp8,0,2.2579466501871743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,8,128,1,float16,float16,0,2.317039966583252
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,8,128,1,float16,fp8,0,2.142629305521647
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,1,128,1,float16,float16,0,2.3089812596639
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,64,128,1,float16,float16,0,1.199445327123006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,64,128,1,float16,fp8,0,1.0956640243530273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,8,128,1,float16,float16,0,0.09270933270454407
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,1,128,1,float16,float16,0,1.3964427312215169
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,1,128,1,float16,fp8,0,1.137072006861369
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,2,128,1,float16,float16,0,1.365978717803955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,4,128,1,float16,fp8,0,1.139456033706665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,4,128,1,float16,float16,0,1.315615971883138
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,2,128,1,float16,fp8,0,1.138602654139201
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,64,2,128,1,float16,fp8,0,2.1467572848002114
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,8,128,1,float16,fp8,0,1.1416266759236653
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,1,128,1,float16,float16,0,0.6794880231221517
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,1,128,1,float16,fp8,0,0.5810133218765259
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,2,128,1,float16,float16,0,0.6312640110651652
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,64,128,1,float16,fp8,0,0.5572533210118612
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,64,128,1,float16,float16,0,0.6461706558863322
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,4,128,1,float16,fp8,0,0.5573386748631796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,4,128,1,float16,float16,0,0.6955040295918783
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,64,8,128,1,float16,fp8,0,0.07866133252779643
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,8,128,1,float16,float16,0,0.5944693485895792
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,1,128,1,float16,float16,0,0.30685333410898846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,1,128,1,float16,fp8,0,0.30408533414204914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,2,128,1,float16,float16,0,0.352453351020813
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,2,128,1,float16,fp8,0,0.28437866767247516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,4,128,1,float16,float16,0,0.34619732697804767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,64,8,128,1,float16,float16,0,1.1680106321970622
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,64,128,1,float16,fp8,0,0.2895946701367696
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,64,128,1,float16,float16,0,0.35700265566507977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,4,128,1,float16,fp8,0,0.30243732531865436
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,8,128,1,float16,float16,0,0.310479998588562
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,64,8,128,1,float16,fp8,0,0.3009546597798665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,1,128,1,float16,fp8,0,0.16335999965667725
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,1,128,1,float16,float16,0,0.1689066688219706
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,4,128,1,float16,float16,0,0.16753600041071573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,2,128,1,float16,fp8,0,0.5827786525090536
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,4,128,1,float16,fp8,0,0.15358933806419373
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,64,128,1,float16,float16,0,0.1916373372077942
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,64,128,1,float16,fp8,0,0.1622666617234548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,8,128,1,float16,fp8,0,0.1544533371925354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,1,128,1,float16,float16,0,0.10207466284434001
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,1,128,1,float16,fp8,0,0.09093333284060161
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,2,128,1,float16,float16,0,0.10206933816274007
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,2,128,1,float16,fp8,0,0.09101866682370503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,64,8,128,1,float16,fp8,0,0.5792586803436279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,64,128,1,float16,float16,0,0.10710400342941284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,64,128,1,float16,fp8,0,0.08813866972923279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,8,128,1,float16,fp8,0,0.08575999736785889
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,64,128,1,float16,float16,0,0.06428266565004985
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,64,128,1,float16,fp8,0,0.052005335688591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,2,128,1,float16,float16,0,0.18573866287867227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,1,128,1,float16,fp8,0,0.0518506666024526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,1,128,1,float16,float16,0,0.06295999884605408
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,2,128,1,float16,fp8,0,0.05193066596984863
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,8,128,1,float16,float16,0,0.16687999169031778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,4,128,1,float16,float16,0,0.056048000852266945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,4,128,1,float16,fp8,0,0.05157333115736643
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,8,128,1,float16,float16,0,0.06217066446940104
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,8,128,1,float16,fp8,0,0.0516480008761088
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,4,128,1,float16,float16,0,0.10380267103513081
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,8,128,1,float16,float16,0,0.1011306643486023
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,1,128,1,float16,float16,0,2.433589299519857
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,1,128,1,float16,fp8,0,2.364042599995931
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,2,128,1,float16,float16,0,2.431861400604248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,64,2,128,1,float16,fp8,0,0.16517333189646402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,64,2,128,1,float16,float16,0,0.06221333146095276
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,2,128,1,float16,fp8,0,2.262394587198893
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,4,128,1,float16,float16,0,2.4371840159098306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,4,128,1,float16,fp8,0,2.262704054514567
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,8,128,1,float16,float16,0,2.441274642944336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,1,128,1,float16,float16,0,1.4122026761372883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,64,4,128,1,float16,fp8,0,0.09237333138783772
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,64,8,128,1,float16,fp8,0,2.3621652921040854
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,64,128,1,float16,float16,0,1.2642933527628581
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,64,128,1,float16,fp8,0,1.154800017674764
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,2,128,1,float16,fp8,0,1.1887146631876628
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,2,128,1,float16,float16,0,1.2252906958262126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,1,128,1,float16,fp8,0,1.1886773109436035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,4,128,1,float16,float16,0,1.4003252983093262
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,4,128,1,float16,fp8,0,1.1881279945373535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,8,128,1,float16,fp8,0,1.1897760232289631
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,1,128,1,float16,fp8,0,0.5766719977060953
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,1,128,1,float16,float16,0,0.7056372960408529
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,64,128,1,float16,float16,0,0.641269326210022
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,64,128,1,float16,fp8,0,0.5854186614354452
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,4,128,1,float16,float16,0,0.6214346488316854
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,8,128,1,float16,float16,0,0.6494826475779215
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,8,128,1,float16,fp8,0,0.6069759925206503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,1,128,1,float16,float16,0,0.35525333881378174
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,1,128,1,float16,fp8,0,0.3086026708285014
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,2,128,1,float16,fp8,0,0.3102666735649109
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,64,128,1,float16,fp8,0,0.3007253408432007
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,64,128,1,float16,float16,0,0.3776693344116211
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,4,128,1,float16,float16,0,0.360368013381958
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,4,128,1,float16,fp8,0,0.311301330725352
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,8,128,1,float16,float16,0,0.32099199295043945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,8,128,1,float16,fp8,0,0.3112693428993225
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,1,128,1,float16,float16,0,0.17747199535369873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,1,128,1,float16,fp8,0,0.15636799732844034
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,2,128,1,float16,float16,0,0.7110880215962728
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,4,128,1,float16,fp8,0,0.5782560110092163
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,64,128,1,float16,fp8,0,0.16029333074887595
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,2,128,1,float16,fp8,0,0.1655946671962738
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,2,128,1,float16,float16,0,0.17112533251444498
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,4,128,1,float16,float16,0,0.18062400817871094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,64,128,1,float16,float16,0,0.17520533005396524
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,4,128,1,float16,fp8,0,0.1647040049235026
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,8,128,1,float16,float16,0,0.1816533406575521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,64,8,128,1,float16,fp8,0,0.16591999928156534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,64,2,128,1,float16,float16,0,0.3521546522776286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,1,128,1,float16,float16,0,0.10213333368301392
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,2,128,1,float16,fp8,0,0.09091732899347942
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,64,8,128,1,float16,float16,0,1.2286933263142903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,4,128,1,float16,float16,0,0.10042132933934529
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,64,128,1,float16,float16,0,0.11033067107200623
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,64,128,1,float16,fp8,0,0.08846400181452434
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,8,128,1,float16,float16,0,0.09599467118581136
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,4,128,1,float16,fp8,0,0.09159466624259949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,8,128,1,float16,fp8,0,0.09230400125185649
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,1,128,1,float16,float16,0,0.06020266811052958
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,1,128,1,float16,fp8,0,0.05137600004673004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,2,128,1,float16,float16,0,0.059605335195859276
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,2,128,1,float16,fp8,0,0.04986133178075155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,64,2,128,1,float16,fp8,0,0.5768426656723022
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,4,128,1,float16,float16,0,0.054805333415667214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,4,128,1,float16,fp8,0,0.04987733562787374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,64,128,1,float16,float16,0,0.06409599880377452
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,8,128,1,float16,fp8,0,0.049914668003718056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,8,128,1,float16,float16,0,0.05469333132108053
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,1,128,1,float16,float16,0,0.038586666186650596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,1,128,1,float16,fp8,0,0.03422933320204417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,2,128,1,float16,float16,0,0.0379573330283165
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,2,128,1,float16,fp8,0,0.033957332372665405
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,2,128,1,float16,float16,0,0.10427199800809224
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,64,128,1,float16,fp8,0,0.03345600018898646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,64,128,1,float16,float16,0,0.035589332381884255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,4,128,1,float16,float16,0,0.03528533379236857
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,8,128,1,float16,float16,0,0.03832533210515976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,64,64,128,1,float16,fp8,0,0.05124799907207489
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,1,128,1,float16,float16,0,1.9301172892252605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,1,128,1,float16,fp8,0,1.6689759890238445
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,2,128,1,float16,float16,0,1.840127944946289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,64,1,128,1,float16,fp8,0,0.09193600217501323
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,4,128,1,float16,fp8,0,0.03382399926582972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,64,8,128,1,float16,fp8,0,0.0348693331082662
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,2,128,1,float16,fp8,0,1.670416037241618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,4,128,1,float16,float16,0,1.809343973795573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,1,128,1,float16,float16,0,0.9800639947255453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,4,128,1,float16,fp8,0,1.7383252779642742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,1,128,1,float16,fp8,0,0.8418827056884766
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,64,128,1,float16,fp8,0,0.848095973332723
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,8,128,1,float16,fp8,0,1.7137813568115234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,64,8,128,1,float16,float16,0,1.809216022491455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,4,128,1,float16,float16,0,0.9110879898071289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,64,128,1,float16,float16,0,0.9340480168660482
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,2,128,1,float16,float16,0,0.9885653654734293
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,4,128,1,float16,fp8,0,0.8417119979858398
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,8,128,1,float16,fp8,0,0.8421546618143717
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,1,128,1,float16,fp8,0,0.43878400325775146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,8,128,1,float16,float16,0,0.9102880160013834
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,4,128,1,float16,float16,0,0.49934399127960205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,64,128,1,float16,fp8,0,0.4340960184733073
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,64,2,128,1,float16,fp8,0,0.8651626904805502
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,1,128,1,float16,float16,0,0.5235786835352579
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,2,128,1,float16,float16,0,0.4622773329416911
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,8,128,1,float16,float16,0,0.5009653170903524
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,2,128,1,float16,fp8,0,0.4272960027058919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,1,128,1,float16,float16,0,0.25226666529973346
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,2,128,1,float16,float16,0,0.23915733893712363
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,8,128,1,float16,fp8,0,0.43939733505249023
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,2,128,1,float16,fp8,0,0.2273013393084208
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,64,128,1,float16,fp8,0,0.22258667151133218
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,64,128,1,float16,float16,0,0.2453119953473409
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,4,128,1,float16,float16,0,0.2388533353805542
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,4,128,1,float16,fp8,0,0.4424053430557251
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,1,128,1,float16,fp8,0,0.22708799441655478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,1,128,1,float16,fp8,0,0.11693867047627766
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,8,128,1,float16,float16,0,0.23947733640670776
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,4,128,1,float16,fp8,0,0.21990933020909628
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,1,128,1,float16,float16,0,0.12714133659998575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,64,8,128,1,float16,fp8,0,0.22200000286102295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,64,64,128,1,float16,float16,0,0.4747519890467326
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,64,128,1,float16,fp8,0,0.11992533008257548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,64,128,1,float16,float16,0,0.14990400274594626
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,4,128,1,float16,float16,0,0.12838932871818542
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,4,128,1,float16,fp8,0,0.11573333541552226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,2,128,1,float16,fp8,0,0.12153066198031108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,8,128,1,float16,float16,0,0.13182933131853738
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,1,128,1,float16,float16,0,0.0765066643555959
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,2,128,1,float16,float16,0,0.07134933272997539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,64,128,1,float16,fp8,0,0.06633600095907848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,4,128,1,float16,float16,0,0.07106666763623555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,2,128,1,float16,fp8,0,0.06739200154940288
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,4,128,1,float16,fp8,0,0.06419733166694641
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,8,128,1,float16,float16,0,0.07028266787528992
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,8,128,1,float16,fp8,0,0.06454933186372121
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,1,128,1,float16,fp8,0,0.037632000943024956
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,1,128,1,float16,float16,0,0.0435146689414978
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,2,128,1,float16,float16,0,0.04098666707674662
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,2,128,1,float16,fp8,0,0.03732266773780187
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,64,128,1,float16,float16,0,0.04748799900213877
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,64,128,1,float16,fp8,0,0.03761066744724909
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,4,128,1,float16,fp8,0,0.0374293327331543
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,8,128,1,float16,float16,0,0.03965866565704346
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,8,128,1,float16,fp8,0,0.03766933331886927
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,2,128,1,float16,float16,0,0.12863999605178833
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,1,128,1,float16,float16,0,0.028058665494124096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,1,128,1,float16,fp8,0,0.02535466601451238
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,64,8,128,1,float16,fp8,0,0.12318399548530579
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,1,128,1,float16,fp8,0,0.06499200065930684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,2,128,1,float16,float16,0,0.028832000990708668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,4,128,1,float16,fp8,0,0.025413334369659424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,64,128,1,float16,float16,0,0.026911998788515728
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,64,128,1,float16,fp8,0,0.0252960001428922
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,4,128,1,float16,float16,0,0.027077332139015198
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,2,128,1,float16,fp8,0,0.02532266577084859
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,8,128,1,float16,float16,0,0.027424000203609467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,1,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,2,128,1,float16,float16,0,0.017130666722853977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,2,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,64,128,1,float16,fp8,0,0.015274666249752045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,64,128,1,float16,float16,0,0.016832000265518825
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,4,128,1,float16,fp8,0,0.015317333241303762
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,8,128,1,float16,float16,0,0.016890666137139004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,8,128,1,float16,fp8,0,0.0161013330022494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,64,64,128,1,float16,float16,0,0.08467732866605122
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,1,128,1,float16,fp8,0,0.6904959678649902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,2,128,1,float16,float16,0,0.7270560264587402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,1,128,1,float16,float16,0,0.017258666455745697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,64,4,128,1,float16,float16,0,0.041349334021409355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,64,4,128,1,float16,float16,0,0.017071999609470367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,4,128,1,float16,float16,0,0.7270399729410807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,2,128,1,float16,fp8,0,0.687328020731608
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,4,128,1,float16,fp8,0,0.6893706321716309
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,8,128,1,float16,float16,0,0.7271573543548584
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,1,128,1,float16,float16,0,0.3700053294499715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,1,128,1,float16,fp8,0,0.35178665320078534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,1,128,1,float16,float16,0,0.7405920028686523
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,64,128,1,float16,float16,0,0.4452960093816121
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,64,8,128,1,float16,fp8,0,0.6875999768575033
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,64,8,128,1,float16,fp8,0,0.02502399931351344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,2,128,1,float16,fp8,0,0.34683199723561603
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,64,128,1,float16,fp8,0,0.34698665142059326
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,4,128,1,float16,float16,0,0.3712746699651082
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,2,128,1,float16,float16,0,0.387445330619812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,4,128,1,float16,fp8,0,0.35199467341105145
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,8,128,1,float16,fp8,0,0.3495733340581258
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,1,128,1,float16,float16,0,0.20479466517766318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,2,128,1,float16,float16,0,0.20321067174275717
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,64,8,128,1,float16,float16,0,0.38740265369415283
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,64,128,1,float16,float16,0,0.19862399498621622
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,1,128,1,float16,fp8,0,0.18228266636530557
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,64,128,1,float16,fp8,0,0.17887999614079794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,2,128,1,float16,fp8,0,0.17836799224217734
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,4,128,1,float16,float16,0,0.19561066230138144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,4,128,1,float16,fp8,0,0.1823093295097351
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,8,128,1,float16,fp8,0,0.18205867211023966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,1,128,1,float16,float16,0,0.1053706705570221
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,1,128,1,float16,fp8,0,0.09361066420873006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,2,128,1,float16,fp8,0,0.09425600369771321
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,64,128,1,float16,float16,0,0.1318933367729187
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,4,128,1,float16,float16,0,0.10283733407656352
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,4,128,1,float16,fp8,0,0.09283199906349182
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,8,128,1,float16,fp8,0,0.09460799892743428
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,1,128,1,float16,float16,0,0.05597866574923197
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,1,128,1,float16,fp8,0,0.051669334371884666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,2,128,1,float16,float16,0,0.056015998125076294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,2,128,1,float16,fp8,0,0.05162666738033295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,4,128,1,float16,float16,0,0.05566933254400889
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,4,128,1,float16,fp8,0,0.05170666674772898
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,64,128,1,float16,float16,0,0.0727893312772115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,64,128,1,float16,fp8,0,0.05192000170548757
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,8,128,1,float16,fp8,0,0.051674668987592064
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,1,128,1,float16,float16,0,0.033557333052158356
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,1,128,1,float16,fp8,0,0.03137599925200144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,2,128,1,float16,float16,0,0.03349866718053818
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,64,128,1,float16,float16,0,0.03958933303753535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,64,8,128,1,float16,float16,0,0.19233600298563638
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,2,128,1,float16,float16,0,0.10204799969991048
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,2,128,1,float16,fp8,0,0.03126933425664902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,4,128,1,float16,float16,0,0.03332799921433131
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,4,128,1,float16,fp8,0,0.03256533294916153
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,8,128,1,float16,fp8,0,0.03128000100453695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,8,128,1,float16,float16,0,0.03369600077470144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,1,128,1,float16,float16,0,0.020874666670958202
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,64,128,1,float16,fp8,0,0.094842662413915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,1,128,1,float16,fp8,0,0.019194666296243668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,2,128,1,float16,float16,0,0.020901332298914593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,4,128,1,float16,float16,0,0.021151999632517498
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,2,128,1,float16,fp8,0,0.019205333044131596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,4,128,1,float16,fp8,0,0.01889066646496455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,64,128,1,float16,float16,0,0.020821332931518555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,64,128,1,float16,fp8,0,0.01889066646496455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,8,128,1,float16,float16,0,0.021029333273569744
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,64,8,128,1,float16,float16,0,0.05573866764704386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,1,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,2,128,1,float16,float16,0,0.013077333569526672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,2,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,4,128,1,float16,float16,0,0.013077333569526672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,64,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,4,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,8,128,1,float16,float16,0,0.01309866706530253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,8,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,64,64,128,1,float16,fp8,0,0.03146666785081228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,1,128,1,float16,float16,0,0.012944000462690989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,1,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,2,128,1,float16,float16,0,0.013088000317414602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,64,8,128,1,float16,float16,0,0.10146666566530864
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,2,128,1,float16,fp8,0,0.012810666114091873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,4,128,1,float16,float16,0,0.012917333592971167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,4,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,8,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,8,128,1,float16,float16,0,0.01302933320403099
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,64,8,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,1,128,1,float16,float16,0,0.013877333452304205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,64,64,128,1,float16,float16,0,0.012917333592971167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,1,128,1,float16,float16,0,0.46326398849487305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,1,128,1,float16,fp8,0,0.44576533635457355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,2,128,1,float16,float16,0,0.4636373519897461
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,2,128,1,float16,fp8,0,0.4413386583328247
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,64,128,1,float16,fp8,0,0.012757333616415659
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,4,128,1,float16,float16,0,0.4651840130488078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,4,128,1,float16,fp8,0,0.4416266679763794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,8,128,1,float16,float16,0,0.46532265345255536
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,1,128,1,float16,float16,0,0.24038400252660116
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,64,8,128,1,float16,fp8,0,0.4413173198699951
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,2,128,1,float16,float16,0,0.23808000485102335
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,64,128,1,float16,fp8,0,0.22706133127212524
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,2,128,1,float16,fp8,0,0.22918933629989624
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,4,128,1,float16,float16,0,0.2398293415705363
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,4,128,1,float16,fp8,0,0.22627200682957968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,8,128,1,float16,float16,0,0.23829332987467447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,1,128,1,float16,float16,0,0.12441066900889079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,8,128,1,float16,fp8,0,0.22696000337600708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,64,64,128,1,float16,float16,0,0.012719999998807907
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,2,128,1,float16,float16,0,0.12424533565839131
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,64,128,1,float16,float16,0,0.12560000022252402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,64,128,1,float16,fp8,0,0.11779733498891194
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,1,128,1,float16,fp8,0,0.11891200145085652
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,2,128,1,float16,fp8,0,0.11958932876586914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,4,128,1,float16,float16,0,0.12575466434160867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,4,128,1,float16,fp8,0,0.11770666639010112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,8,128,1,float16,float16,0,0.12362666924794515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,64,8,128,1,float16,fp8,0,0.1195093293984731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,1,128,1,float16,float16,0,0.06944533189137776
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,1,128,1,float16,fp8,0,0.22910400231679282
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,64,128,1,float16,fp8,0,0.0642080008983612
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,64,128,1,float16,float16,0,0.06830933193365733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,2,128,1,float16,fp8,0,0.06352533400058746
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,4,128,1,float16,float16,0,0.06795733173688252
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,8,128,1,float16,float16,0,0.0672159989674886
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,1,128,1,float16,float16,0,0.03738666574160258
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,1,128,1,float16,fp8,0,0.035242666800816856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,64,128,1,float16,fp8,0,0.035418666899204254
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,64,128,1,float16,float16,0,0.041002665956815086
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,2,128,1,float16,float16,0,0.03724266588687897
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,4,128,1,float16,fp8,0,0.035216001172860466
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,2,128,1,float16,fp8,0,0.03531199942032496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,4,128,1,float16,float16,0,0.03753600021203359
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,8,128,1,float16,float16,0,0.037461332976818085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,1,128,1,float16,fp8,0,0.061893333991368614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,1,128,1,float16,float16,0,0.02316266546646754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,64,64,128,1,float16,float16,0,0.24013332525889078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,1,128,1,float16,fp8,0,0.022309333086013794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,4,128,1,float16,fp8,0,0.06331733365853627
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,2,128,1,float16,float16,0,0.023029332359631855
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,64,128,1,float16,fp8,0,0.022965334355831146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,64,128,1,float16,float16,0,0.023007998863856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,4,128,1,float16,fp8,0,0.022309333086013794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,8,128,1,float16,float16,0,0.022970666488011677
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,8,128,1,float16,fp8,0,0.022944000860055287
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,1,128,1,float16,float16,0,0.014864000181357065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,1,128,1,float16,fp8,0,0.014725333700577417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,2,128,1,float16,float16,0,0.014826666563749313
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,64,128,1,float16,fp8,0,0.014805333067973455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,64,128,1,float16,float16,0,0.015061333775520325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,64,8,128,1,float16,fp8,0,0.03446933378775915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,4,128,1,float16,float16,0,0.014853333433469137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,4,128,1,float16,fp8,0,0.014767999450365702
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,8,128,1,float16,fp8,0,0.014597332725922266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,8,128,1,float16,float16,0,0.015125333021084467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,1,128,1,float16,float16,0,0.010709332923094431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,1,128,1,float16,fp8,0,0.010928000013033548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,2,128,1,float16,float16,0,0.06821866830190022
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,2,128,1,float16,float16,0,0.010922666639089584
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,2,128,1,float16,fp8,0,0.02266666789849599
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,64,4,128,1,float16,float16,0,0.023711999257405598
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,64,128,1,float16,fp8,0,0.010666667173306147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,4,128,1,float16,float16,0,0.010629333555698395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,4,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,64,8,128,1,float16,fp8,0,0.0643146683772405
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,8,128,1,float16,float16,0,0.010933333386977514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,8,128,1,float16,fp8,0,0.010538666198650995
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,2,128,1,float16,float16,0,0.010672000547250112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,1,128,1,float16,fp8,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,2,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,4,128,1,float16,float16,0,0.010735999792814255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,4,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,64,128,1,float16,fp8,0,0.00919999989370505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,8,128,1,float16,float16,0,0.010608000059922537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,64,2,128,1,float16,fp8,0,0.014912000546852747
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,2,128,1,float16,fp8,0,0.010714666297038397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,64,64,128,1,float16,float16,0,0.010714666297038397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,1,128,1,float16,float16,0,0.01073066641887029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,64,128,1,float16,float16,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,64,8,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,2,128,1,float16,float16,0,0.39157334963480633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,1,128,1,float16,float16,0,0.3904053370157878
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,1,128,1,float16,fp8,0,0.3761813243230184
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,8,128,1,float16,float16,0,0.3913866678873698
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,4,128,1,float16,float16,0,0.3911093473434448
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,8,128,1,float16,fp8,0,0.37382400035858154
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,1,128,1,float16,float16,0,0.203274667263031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,4,128,1,float16,fp8,0,0.3754826784133911
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,64,128,1,float16,float16,0,0.20256533225377402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,64,128,1,float16,fp8,0,0.19174933433532715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,2,128,1,float16,float16,0,0.20277865727742514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,1,128,1,float16,fp8,0,0.19572800397872925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,2,128,1,float16,fp8,0,0.19325333833694458
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,4,128,1,float16,fp8,0,0.19214399655659994
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,64,2,128,1,float16,fp8,0,0.3729546864827474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,1,128,1,float16,float16,0,0.10798399647076924
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,1,128,1,float16,fp8,0,0.10108799735705058
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,2,128,1,float16,fp8,0,0.10095999638239543
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,4,128,1,float16,float16,0,0.10980799794197083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,8,128,1,float16,float16,0,0.20518400271733603
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,8,128,1,float16,fp8,0,0.19217065970102945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,64,128,1,float16,fp8,0,0.10106666882832845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,2,128,1,float16,float16,0,0.10620799660682678
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,64,4,128,1,float16,float16,0,0.2018400033315023
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,4,128,1,float16,fp8,0,0.10105599959691365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,8,128,1,float16,fp8,0,0.1018506685892741
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,8,128,1,float16,float16,0,0.10686399539311726
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,2,128,1,float16,float16,0,0.05819199979305267
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,1,128,1,float16,fp8,0,0.053770666321118675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,1,128,1,float16,float16,0,0.058320000767707825
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,4,128,1,float16,fp8,0,0.05398933092753092
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,4,128,1,float16,float16,0,0.059903999169667564
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,8,128,1,float16,float16,0,0.05778133372465769
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,8,128,1,float16,fp8,0,0.05363733569780985
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,64,128,1,float16,float16,0,0.05824000140031179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,64,128,1,float16,fp8,0,0.05399466554323832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,1,128,1,float16,float16,0,0.03325333446264267
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,1,128,1,float16,fp8,0,0.030997333427270252
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,4,128,1,float16,fp8,0,0.02945599953333537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,64,128,1,float16,fp8,0,0.029274667302767437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,64,128,1,float16,float16,0,0.032816000282764435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,64,64,128,1,float16,float16,0,0.1072160005569458
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,8,128,1,float16,float16,0,0.031914666295051575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,8,128,1,float16,fp8,0,0.0306986669699351
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,1,128,1,float16,float16,0,0.021151999632517498
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,1,128,1,float16,fp8,0,0.019018666197856266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,2,128,1,float16,float16,0,0.021253332495689392
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,2,128,1,float16,fp8,0,0.019066666563351948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,64,128,1,float16,float16,0,0.021125334004561108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,4,128,1,float16,float16,0,0.021226666867733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,64,128,1,float16,fp8,0,0.01918399954835574
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,8,128,1,float16,float16,0,0.021018666525681812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,8,128,1,float16,fp8,0,0.018965333700180054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,1,128,1,float16,float16,0,0.013749333719412485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,1,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,2,128,1,float16,float16,0,0.014389333625634512
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,64,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,2,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,4,128,1,float16,float16,0,0.01310933381319046
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,4,128,1,float16,float16,0,0.03319466610749563
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,64,128,1,float16,float16,0,0.015109332899252573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,4,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,8,128,1,float16,float16,0,0.013162666310866674
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,64,8,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,2,128,1,float16,fp8,0,0.03089066594839096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,1,128,1,float16,fp8,0,0.010656000425418219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,2,128,1,float16,float16,0,0.010805333654085795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,2,128,1,float16,fp8,0,0.01211200033624967
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,4,128,1,float16,float16,0,0.010768000036478043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,4,128,1,float16,fp8,0,0.010821333775917688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,64,2,128,1,float16,fp8,0,0.054042667150497437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,64,128,1,float16,fp8,0,0.00960533320903778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,64,128,1,float16,float16,0,0.011077333241701126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,8,128,1,float16,float16,0,0.010794666906197866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,64,4,128,1,float16,fp8,0,0.01883200059334437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,1,128,1,float16,fp8,0,0.009226666763424873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,2,128,1,float16,float16,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,2,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,4,128,1,float16,float16,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,4,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,64,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,8,128,1,float16,fp8,0,0.00914666677514712
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,8,128,1,float16,float16,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,64,128,1,float16,float16,0,0.010415999839703241
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,64,2,128,1,float16,float16,0,0.03278933217128118
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,1,128,1,float16,float16,0,0.35655466715494794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,1,128,1,float16,fp8,0,0.3408106565475464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,2,128,1,float16,float16,0,0.359279990196228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,2,128,1,float16,fp8,0,0.33663467566172284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,8,128,1,float16,fp8,0,0.010837333897749582
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,64,1,128,1,float16,float16,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,4,128,1,float16,float16,0,0.35075732072194415
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,4,128,1,float16,fp8,0,0.3373226722081502
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,8,128,1,float16,float16,0,0.3492213487625122
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,64,8,128,1,float16,fp8,0,0.3386293252309163
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,1,128,1,float16,float16,0,0.18360000848770142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,1,128,1,float16,fp8,0,0.1760746637980143
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,64,128,1,float16,float16,0,0.18201599518458048
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,2,128,1,float16,float16,0,0.18620800971984863
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,2,128,1,float16,fp8,0,0.17527467012405396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,4,128,1,float16,float16,0,0.18636800845464072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,4,128,1,float16,fp8,0,0.17332265774408975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,64,1,128,1,float16,float16,0,0.010714666297038397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,8,128,1,float16,fp8,0,0.1749173402786255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,1,128,1,float16,float16,0,0.10136533776919048
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,1,128,1,float16,fp8,0,0.09081600109736125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,8,128,1,float16,float16,0,0.18336532513300577
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,2,128,1,float16,float16,0,0.09852799773216248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,2,128,1,float16,fp8,0,0.09149866302808125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,4,128,1,float16,float16,0,0.09672533472379048
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,64,128,1,float16,fp8,0,0.0909440020720164
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,64,128,1,float16,float16,0,0.09701866904894511
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,4,128,1,float16,fp8,0,0.09273599584897359
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,8,128,1,float16,float16,0,0.09774933258692424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,1,128,1,float16,fp8,0,0.049925332268079124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,2,128,1,float16,float16,0,0.05503466725349426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,2,128,1,float16,fp8,0,0.04959466556708018
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,4,128,1,float16,float16,0,0.05409066875775655
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,1,128,1,float16,float16,0,0.0540533314148585
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,64,128,1,float16,fp8,0,0.04934933284918467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,64,64,128,1,float16,fp8,0,0.17384000619252524
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,4,128,1,float16,fp8,0,0.04818133513132731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,8,128,1,float16,float16,0,0.05475200215975443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,1,128,1,float16,float16,0,0.030975999931494396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,2,128,1,float16,float16,0,0.030394665896892548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,1,128,1,float16,fp8,0,0.02886933336655299
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,2,128,1,float16,fp8,0,0.027456000447273254
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,4,128,1,float16,float16,0,0.03133866687615713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,4,128,1,float16,fp8,0,0.027045334378878277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,64,128,1,float16,float16,0,0.031221332649389904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,64,128,1,float16,fp8,0,0.027402666707833607
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,8,128,1,float16,fp8,0,0.027087998886903126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,64,8,128,1,float16,float16,0,0.031221332649389904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,1,128,1,float16,float16,0,0.018954666952292126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,64,8,128,1,float16,fp8,0,0.09082667032877605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,1,128,1,float16,fp8,0,0.018976000448067982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,2,128,1,float16,fp8,0,0.019167999426523846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,4,128,1,float16,fp8,0,0.019071999937295914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,64,128,1,float16,float16,0,0.05573866764704386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,64,128,1,float16,fp8,0,0.018965333700180054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,8,128,1,float16,float16,0,0.01926933353145917
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,8,128,1,float16,fp8,0,0.01904533306757609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,64,8,128,1,float16,fp8,0,0.0491946687301
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,1,128,1,float16,float16,0,0.01403733342885971
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,1,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,2,128,1,float16,float16,0,0.012885333349307379
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,4,128,1,float16,float16,0,0.013194666554530462
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,4,128,1,float16,fp8,0,0.012746666868527731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,8,128,1,float16,float16,0,0.012805332740147909
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,64,128,1,float16,float16,0,0.01309866706530253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,8,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,1,128,1,float16,float16,0,0.010266666611035665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,1,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,2,128,1,float16,float16,0,0.0100853331387043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,64,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,2,128,1,float16,float16,0,0.018885333091020584
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,4,128,1,float16,float16,0,0.010640000303586325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,64,128,1,float16,float16,0,0.01097600037852923
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,4,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,8,128,1,float16,float16,0,0.010672000547250112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,8,128,1,float16,fp8,0,0.00915733352303505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,4,128,1,float16,float16,0,0.019359999646743137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,1,128,1,float16,float16,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,1,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,64,64,128,1,float16,float16,0,0.01916266605257988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,2,128,1,float16,float16,0,0.009178666397929192
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,2,128,1,float16,fp8,0,0.009861333295702934
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,64,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,2,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,8,128,1,float16,float16,0,0.009130666653315226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,8,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,4,128,1,float16,float16,0,0.00891733355820179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,64,64,128,1,float16,fp8,0,0.012741333494583765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,1,128,1,float16,float16,0,0.3466666539510091
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,2,128,1,float16,float16,0,0.34721601009368896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,1,128,1,float16,fp8,0,0.3243199984232585
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,2,128,1,float16,fp8,0,0.3243573307991028
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,64,2,128,1,float16,fp8,0,0.008746666833758354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,4,128,1,float16,float16,0,0.34541332721710205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,4,128,1,float16,fp8,0,0.3242666721343994
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,8,128,1,float16,fp8,0,0.3239893317222595
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,4,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,64,128,1,float16,float16,0,0.18004266421000162
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,64,128,1,float16,fp8,0,0.16673600673675537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,1,128,1,float16,fp8,0,0.16660799582799277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,2,128,1,float16,float16,0,0.17990400393803915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,2,128,1,float16,fp8,0,0.1665440003077189
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,8,128,1,float16,fp8,0,0.16709866126378378
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,4,128,1,float16,fp8,0,0.16882665952046713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,1,128,1,float16,float16,0,0.09505066275596619
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,64,128,1,float16,float16,0,0.09750933448473613
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,1,128,1,float16,fp8,0,0.08841600020726521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,64,128,1,float16,fp8,0,0.08878399928410848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,1,128,1,float16,float16,0,0.18447999159495035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,2,128,1,float16,fp8,0,0.08712533116340637
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,2,128,1,float16,float16,0,0.09522666533788045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,64,64,128,1,float16,float16,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,4,128,1,float16,fp8,0,0.08904533584912618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,8,128,1,float16,float16,0,0.09396800398826599
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,8,128,1,float16,fp8,0,0.08658132950464885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,1,128,1,float16,float16,0,0.05384000142415365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,1,128,1,float16,fp8,0,0.04790399968624115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,64,128,1,float16,float16,0,0.0539680023988088
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,4,128,1,float16,float16,0,0.18780799706776938
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,64,128,1,float16,fp8,0,0.0479360024134318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,2,128,1,float16,float16,0,0.05274133384227753
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,4,128,1,float16,float16,0,0.05470933516820272
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,8,128,1,float16,float16,0,0.05340266724427541
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,2,128,1,float16,fp8,0,0.04964800179004669
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,8,128,1,float16,fp8,0,0.047610665361086525
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,64,8,128,1,float16,float16,0,0.3465813398361206
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,1,128,1,float16,float16,0,0.031210665901501972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,1,128,1,float16,fp8,0,0.027509334186712902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,64,4,128,1,float16,float16,0,0.09493866562843323
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,2,128,1,float16,fp8,0,0.02717866748571396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,4,128,1,float16,float16,0,0.031317333380381264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,4,128,1,float16,fp8,0,0.027061333258946735
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,8,128,1,float16,float16,0,0.029391999046007793
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,8,128,1,float16,fp8,0,0.027136000494162243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,64,8,128,1,float16,float16,0,0.18006932735443115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,64,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,1,128,1,float16,float16,0,0.019167999426523846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,64,128,1,float16,float16,0,0.018992000569899876
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,2,128,1,float16,float16,0,0.01923199991385142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,64,4,128,1,float16,fp8,0,0.049600000182787575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,4,128,1,float16,fp8,0,0.01915733392039935
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,4,128,1,float16,float16,0,0.019194666296243668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,8,128,1,float16,float16,0,0.018901333212852478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,8,128,1,float16,fp8,0,0.01912533367673556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,1,128,1,float16,float16,0,0.013104000439246496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,64,128,1,float16,fp8,0,0.027274665733178455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,1,128,1,float16,fp8,0,0.013237333546082178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,64,128,1,float16,float16,0,0.012837332983811697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,64,128,1,float16,fp8,0,0.012773333738247553
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,2,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,4,128,1,float16,float16,0,0.013114667187134424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,8,128,1,float16,float16,0,0.013023999830087027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,4,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,8,128,1,float16,fp8,0,0.012789333860079447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,1,128,1,float16,float16,0,0.01062400018175443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,1,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,1,128,1,float16,fp8,0,0.018160000443458557
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,2,128,1,float16,float16,0,0.010693332801262537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,64,128,1,float16,float16,0,0.010970667004585266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,64,128,1,float16,fp8,0,0.008842666943868002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,64,2,128,1,float16,fp8,0,0.018624000251293182
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,4,128,1,float16,float16,0,0.009893333539366722
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,4,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,8,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,1,128,1,float16,float16,0,0.009130666653315226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,1,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,2,128,1,float16,float16,0,0.03088533381621043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,2,128,1,float16,float16,0,0.009098666409651438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,64,128,1,float16,float16,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,64,2,128,1,float16,float16,0,0.012863999853531519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,4,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,8,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,2,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,1,128,1,float16,float16,0,0.012586666891972223
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,64,8,128,1,float16,float16,0,0.01080000028014183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,64,64,128,1,float16,float16,0,0.030778666337331135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,1,128,1,float16,fp8,0,0.0107893335322539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,2,128,1,float16,float16,0,0.018976000448067982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,2,128,1,float16,fp8,0,0.016901332885026932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,8,128,1,float16,float16,0,0.009061333412925402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,0,0.016906666258970898
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,4,128,1,float16,float16,0,0.021125334004561108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,8,128,1,float16,float16,0,0.02703999976317088
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,0,0.022965334355831146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,1,128,1,float16,float16,0,0.00891733355820179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,2,128,1,float16,float16,0,0.012757333616415659
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,1,128,1,float16,fp8,0,0.008576000109314919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,64,128,1,float16,fp8,0,0.07202133536338806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,64,128,1,float16,float16,0,0.09703999757766724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,2,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,0,0.014725333700577417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,0,0.012794667234023413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,0,0.016794666647911072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,64,128,1,float16,fp8,0,0.009072000160813332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,1,128,1,float16,fp8,0,0.008549333239595095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,2,128,1,float16,fp8,0,0.008661333471536636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,2,128,1,float16,float16,0,0.009253333633144697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,64,128,1,float16,float16,0,0.05202133456865946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,64,128,1,float16,fp8,0,0.039488000174363456
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,0,0.011071999867757162
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,0,0.011285333583752314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,0,0.010768000036478043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,1,128,1,float16,float16,0,0.008703999842206636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,1,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,2,128,1,float16,float16,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,0,0.00902399979531765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,0,0.007925333455204964
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,64,128,1,float16,fp8,0,0.023056000471115112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,0,0.008597333605090777
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,64,2,128,1,float16,fp8,0,0.008736000085870424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,0,0.014874666929244995
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,64,128,1,float16,fp8,0,0.014906667172908783
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,1,128,1,float16,float16,0,0.008799999952316284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,2,128,1,float16,float16,0,0.008656000097592672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,2,128,1,float16,fp8,0,0.00850133349498113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,0,0.008757333581646284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,0,0.0069440001000960665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,0,0.00879466657837232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,0,0.006831999868154526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,2,128,1,float16,fp8,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,1,128,1,float16,float16,0,0.008746666833758354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,64,64,128,1,float16,float16,0,0.0273333340883255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,1,128,1,float16,fp8,0,0.008789333204428354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,2,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,64,128,1,float16,fp8,0,0.010768000036478043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,0,0.007034666836261749
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,0,0.0069919998447100324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,64,1,128,1,float16,float16,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,0,0.007040000210205714
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,0,0.007029333462317784
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,1,128,1,float16,float16,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,64,128,1,float16,float16,0,0.016869333883126576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,64,1,128,1,float16,fp8,0,0.008592000231146812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,1,128,1,float16,fp8,0,0.007637333124876022
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,0,0.007007999966541926
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,2,128,1,float16,float16,0,0.00867733359336853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,0,0.006874666859706243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,64,128,1,float16,fp8,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,64,128,1,float16,float16,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,0,0.006624000146985054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,0,0.006853333363930385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,1,128,1,float16,fp8,0,0.008565333361426989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,2,128,1,float16,float16,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,2,128,1,float16,fp8,0,0.008592000231146812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,64,128,1,float16,fp8,0,0.00702400008837382
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,64,128,1,float16,float16,0,0.008506666868925095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,0,0.006864000111818314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,0,0.006911999856432279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,64,128,1,float16,fp8,0,0.006917333230376244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,64,2,128,1,float16,fp8,0,0.008656000097592672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,1,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,64,128,1,float16,float16,0,0.006688000013430913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,1,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,2,128,1,float16,float16,0,0.008816000074148178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,2,128,1,float16,fp8,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,0,0.0069759997228781385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,0,0.006591999903321266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,0,0.006741333131988843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,1,128,1,float16,float16,0,0.008799999952316284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,64,64,128,1,float16,float16,0,0.012842666357755661
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,0,0.0069919998447100324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,0,0.006602666651209195
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,2,128,1,float16,float16,0,0.008565333361426989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,0,0.007269333427151044
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,1,128,1,float16,fp8,0,15.111039479573568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,1,128,1,float16,float16,0,16.377413431803387
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,2,128,1,float16,fp8,0,15.130282084147135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,2,128,1,float16,float16,0,16.369834899902344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,4,128,1,float16,float16,0,16.3733647664388
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,4,128,1,float16,fp8,0,15.747589111328125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,48,128,1,float16,fp8,0,7.6304372151692705
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,8,128,1,float16,float16,0,16.37701924641927
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,48,8,128,1,float16,fp8,0,15.128560384114584
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,1,128,1,float16,fp8,0,8.111322402954102
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,1,128,1,float16,float16,0,10.14846420288086
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,2,128,1,float16,float16,0,8.252410888671875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,2,128,1,float16,fp8,0,8.138453165690104
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,48,128,1,float16,float16,0,4.33022403717041
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,48,128,1,float16,float16,0,8.267194747924805
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,48,128,1,float16,fp8,0,3.873114585876465
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,8,128,1,float16,fp8,0,7.6167786916097
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,4,128,1,float16,float16,0,8.25714111328125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,4,128,1,float16,fp8,0,8.094149271647135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,1,128,1,float16,fp8,0,3.880869229634603
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,1,128,1,float16,float16,0,5.447733561197917
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,2,128,1,float16,float16,0,4.191199938456218
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,4,128,1,float16,float16,0,4.192816098531087
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,4,128,1,float16,fp8,0,4.111584027608235
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,8,128,1,float16,float16,0,4.194053332010905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,1,128,1,float16,fp8,0,1.9914026260375977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,8,128,1,float16,fp8,0,3.8842506408691406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,48,128,1,float16,float16,0,2.356698671976725
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,1,128,1,float16,float16,0,2.6135466893514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,2,128,1,float16,float16,0,2.60099204381307
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,48,128,1,float16,fp8,0,1.9946773846944172
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,2,128,1,float16,fp8,0,1.9925440152486165
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,4,128,1,float16,float16,0,2.6701812744140625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,4,128,1,float16,fp8,0,2.1621920267740884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,8,128,1,float16,float16,0,2.200709342956543
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,48,2,128,1,float16,fp8,0,4.1119734446207685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,48,8,128,1,float16,float16,0,8.262720108032227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,1,128,1,float16,float16,0,9.428607940673828
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,48,8,128,1,float16,fp8,0,2.2405385971069336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,1,128,1,float16,fp8,0,8.723328272501627
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,2,128,1,float16,fp8,0,9.049338658650717
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,2,128,1,float16,float16,0,9.534709294637045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,4,128,1,float16,float16,0,9.432522455851236
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,4,128,1,float16,fp8,0,9.31613858540853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,8,128,1,float16,float16,0,9.443791707356771
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,48,128,1,float16,fp8,0,4.416346549987793
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,1,128,1,float16,fp8,0,4.705600102742513
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,48,128,1,float16,float16,0,4.781365394592285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,2,128,1,float16,float16,0,4.766970634460449
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,1,128,1,float16,float16,0,6.343423843383789
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,48,8,128,1,float16,fp8,0,8.721408208211264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,2,128,1,float16,fp8,0,4.70521608988444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,4,128,1,float16,fp8,0,4.712640126546224
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,4,128,1,float16,float16,0,4.770096143086751
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,8,128,1,float16,float16,0,4.769418716430664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,48,128,1,float16,fp8,0,2.2524213790893555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,48,128,1,float16,float16,0,2.562592029571533
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,1,128,1,float16,float16,0,2.4365973472595215
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,1,128,1,float16,fp8,0,2.4175999959309897
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,2,128,1,float16,float16,0,2.9994986852010093
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,2,128,1,float16,fp8,0,2.3975092569986978
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,4,128,1,float16,float16,0,2.434725284576416
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,8,128,1,float16,float16,0,2.436207930246989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,4,128,1,float16,fp8,0,2.44268798828125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,48,8,128,1,float16,fp8,0,2.2512906392415366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,1,128,1,float16,float16,0,1.5067946116129558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,1,128,1,float16,fp8,0,1.2474666436513264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,2,128,1,float16,fp8,0,1.1748586495717366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,48,128,1,float16,fp8,0,1.2232906818389893
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,2,128,1,float16,float16,0,1.5010026295979817
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,4,128,1,float16,fp8,0,1.364682674407959
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,48,8,128,1,float16,fp8,0,4.405989329020183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,8,128,1,float16,float16,0,1.348591963450114
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,8,128,1,float16,fp8,0,1.1788746515909831
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,1,128,1,float16,fp8,0,6.6029707590738935
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,1,128,1,float16,float16,0,6.688757578531901
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,48,128,1,float16,float16,0,1.275333325068156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,2,128,1,float16,fp8,0,6.176736195882161
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,48,4,128,1,float16,float16,0,1.2707146803538005
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,2,128,1,float16,float16,0,6.679402669270833
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,4,128,1,float16,fp8,0,6.182522455851237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,4,128,1,float16,float16,0,6.684272130330403
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,1,128,1,float16,fp8,0,3.1281652450561523
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,1,128,1,float16,float16,0,4.133717219034831
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,2,128,1,float16,fp8,0,3.338949203491211
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,2,128,1,float16,float16,0,4.148976008097331
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,48,128,1,float16,float16,0,4.0676266352335615
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,8,128,1,float16,fp8,0,6.182474772135417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,48,8,128,1,float16,float16,0,6.67636235555013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,4,128,1,float16,float16,0,4.11846923828125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,4,128,1,float16,fp8,0,3.7470080057779946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,8,128,1,float16,fp8,0,3.5673653284708657
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,1,128,1,float16,float16,0,2.06769069035848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,48,128,1,float16,fp8,0,1.6068800290425618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,48,128,1,float16,float16,0,1.9472959836324055
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,1,128,1,float16,fp8,0,1.961535930633545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,2,128,1,float16,float16,0,1.7796746889750164
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,2,128,1,float16,fp8,0,1.6270559628804524
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,4,128,1,float16,float16,0,2.096735954284668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,4,128,1,float16,fp8,0,1.7110506693522136
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,48,128,1,float16,fp8,0,3.2425759633382163
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,8,128,1,float16,float16,0,1.8728319803873699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,48,8,128,1,float16,fp8,0,1.7874719301859539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,1,128,1,float16,float16,0,1.1451253096262615
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,1,128,1,float16,fp8,0,0.8401652971903483
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,2,128,1,float16,float16,0,1.085488001505534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,48,128,1,float16,fp8,0,0.8458879788716634
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,2,128,1,float16,fp8,0,0.9045759836832682
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,4,128,1,float16,float16,0,1.079919974009196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,4,128,1,float16,fp8,0,0.9534666538238525
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,8,128,1,float16,float16,0,0.9158773422241211
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,8,128,1,float16,fp8,0,0.8991680145263672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,48,8,128,1,float16,float16,0,4.229541460673015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,48,48,128,1,float16,float16,0,0.9121279716491699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,1,128,1,float16,fp8,0,8.453573226928711
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,2,128,1,float16,fp8,0,8.094474792480469
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,2,128,1,float16,float16,0,8.709338506062826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,1,128,1,float16,float16,0,10.128373463948568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,4,128,1,float16,float16,0,9.042597452799479
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,8,128,1,float16,float16,0,11.698436737060547
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,1,128,1,float16,float16,0,4.389626820882161
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,1,128,1,float16,fp8,0,4.4945065180460615
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,48,128,1,float16,fp8,0,4.084293365478516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,48,128,1,float16,float16,0,4.4176638921101885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,2,128,1,float16,float16,0,5.761381149291992
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,8,128,1,float16,fp8,0,8.080031712849935
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,48,4,128,1,float16,fp8,0,8.08619753519694
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,2,128,1,float16,fp8,0,4.469818751017253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,4,128,1,float16,fp8,0,4.369920094807942
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,1,128,1,float16,float16,0,2.7169386545817056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,8,128,1,float16,float16,0,4.393402735392253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,8,128,1,float16,fp8,0,4.362309455871582
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,48,128,1,float16,float16,0,2.3798880577087402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,1,128,1,float16,fp8,0,2.2373387018839517
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,2,128,1,float16,float16,0,2.231818675994873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,2,128,1,float16,fp8,0,2.2985706329345703
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,4,128,1,float16,float16,0,2.2318347295125327
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,4,128,1,float16,fp8,0,2.2231839497884116
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,8,128,1,float16,float16,0,2.399360020955404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,1,128,1,float16,float16,0,1.3638399442036946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,1,128,1,float16,fp8,0,1.1427199840545654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,48,128,1,float16,fp8,0,1.16866668065389
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,48,4,128,1,float16,float16,0,4.39197317759196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,2,128,1,float16,fp8,0,1.1407999992370605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,2,128,1,float16,float16,0,1.4024799664815266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,4,128,1,float16,fp8,0,1.16047469774882
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,4,128,1,float16,float16,0,1.4120426177978516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,48,128,1,float16,fp8,0,2.0743093490600586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,8,128,1,float16,float16,0,1.1518719991048176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,8,128,1,float16,fp8,0,1.1637067000071208
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,1,128,1,float16,fp8,0,0.6082079807917277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,48,128,1,float16,fp8,0,0.6069226662317911
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,2,128,1,float16,fp8,0,0.6047893365224203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,4,128,1,float16,float16,0,0.6733653545379639
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,4,128,1,float16,fp8,0,0.6026613314946493
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,48,8,128,1,float16,fp8,0,2.0701707204182944
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,8,128,1,float16,float16,0,0.6147040128707886
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,8,128,1,float16,fp8,0,0.6035573482513428
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,48,48,128,1,float16,float16,0,1.1568106810251872
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,1,128,1,float16,float16,0,0.7227359612782797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,2,128,1,float16,float16,0,0.726586659749349
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,1,128,1,float16,fp8,0,4.934218724568685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,1,128,1,float16,float16,0,5.320949236551921
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,2,128,1,float16,float16,0,5.109957377115886
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,4,128,1,float16,float16,0,5.11403210957845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,2,128,1,float16,fp8,0,5.0846452713012695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,1,128,1,float16,fp8,0,2.570256074269613
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,1,128,1,float16,float16,0,3.1693973541259766
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,4,128,1,float16,fp8,0,4.751994768778483
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,8,128,1,float16,float16,0,5.116794586181641
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,48,128,1,float16,fp8,0,2.4130080540974936
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,48,128,1,float16,float16,0,2.630645275115967
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,48,8,128,1,float16,fp8,0,5.094117482503255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,48,48,128,1,float16,float16,0,0.6344266732533773
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,2,128,1,float16,float16,0,2.5841333071390786
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,4,128,1,float16,float16,0,2.5839786529541016
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,2,128,1,float16,fp8,0,3.1719465255737305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,8,128,1,float16,float16,0,2.5865012804667153
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,4,128,1,float16,fp8,0,2.5698134104410806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,1,128,1,float16,float16,0,1.5769599278767903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,48,8,128,1,float16,fp8,0,2.5761653582255044
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,48,128,1,float16,float16,0,1.329093297322591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,48,128,1,float16,fp8,0,1.315386692682902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,1,128,1,float16,fp8,0,1.3127573331197102
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,2,128,1,float16,float16,0,1.6008480389912922
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,2,128,1,float16,fp8,0,1.3123946984608967
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,4,128,1,float16,fp8,0,1.311295986175537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,1,128,1,float16,fp8,0,0.6837013562520345
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,2,128,1,float16,float16,0,0.6887893676757812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,8,128,1,float16,fp8,0,1.31168532371521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,8,128,1,float16,float16,0,1.6107412974039714
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,48,128,1,float16,fp8,0,0.641706665356954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,2,128,1,float16,fp8,0,0.6828533013661703
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,4,128,1,float16,float16,0,0.6890079975128174
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,4,128,1,float16,fp8,0,0.6833706696828207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,8,128,1,float16,float16,0,0.7841493288675944
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,8,128,1,float16,fp8,0,0.638154665629069
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,1,128,1,float16,float16,0,0.3980106512705485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,1,128,1,float16,fp8,0,0.393338680267334
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,2,128,1,float16,float16,0,0.3720373312632243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,48,128,1,float16,float16,0,0.38147199153900146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,4,128,1,float16,float16,0,0.38281067212422687
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,8,128,1,float16,float16,0,0.3733760118484497
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,8,128,1,float16,fp8,0,0.37601598103841144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,48,4,128,1,float16,float16,0,1.3214293320973713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,1,128,1,float16,float16,0,0.8326133092244467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,48,48,128,1,float16,float16,0,0.7595626513163248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,2,128,1,float16,fp8,0,0.3460533221562703
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,1,128,1,float16,fp8,0,4.863653182983398
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,48,128,1,float16,fp8,0,0.3468960126241048
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,1,128,1,float16,float16,0,6.467871983846028
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,48,4,128,1,float16,fp8,0,0.34513068199157715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,2,128,1,float16,float16,0,5.222757339477539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,2,128,1,float16,fp8,0,4.567359924316406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,4,128,1,float16,float16,0,5.17299747467041
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,1,128,1,float16,float16,0,2.506138642628988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,1,128,1,float16,fp8,0,2.456058661142985
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,4,128,1,float16,fp8,0,4.907941182454427
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,48,128,1,float16,fp8,0,2.3126932779947915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,8,128,1,float16,fp8,0,4.5639041264851885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,48,8,128,1,float16,float16,0,4.884453455607097
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,48,128,1,float16,float16,0,2.6818453470865884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,2,128,1,float16,float16,0,2.946181297302246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,4,128,1,float16,float16,0,2.4623360633850098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,2,128,1,float16,fp8,0,2.310805320739746
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,4,128,1,float16,fp8,0,2.523418744405111
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,8,128,1,float16,fp8,0,2.300325393676758
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,48,8,128,1,float16,float16,0,2.466362635294596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,48,128,1,float16,fp8,0,1.1871786912282307
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,1,128,1,float16,fp8,0,1.1693387031555176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,48,128,1,float16,float16,0,1.2687359650929768
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,2,128,1,float16,float16,0,1.3340746561686199
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,2,128,1,float16,fp8,0,1.2557013034820557
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,4,128,1,float16,float16,0,1.3342773119608562
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,4,128,1,float16,fp8,0,1.24618665377299
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,1,128,1,float16,float16,0,0.7759733200073242
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,8,128,1,float16,float16,0,1.3345279693603516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,8,128,1,float16,fp8,0,1.1698026657104492
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,1,128,1,float16,fp8,0,0.644757350285848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,2,128,1,float16,float16,0,0.7600906689961752
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,48,128,1,float16,fp8,0,0.608730673789978
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,48,128,1,float16,float16,0,0.6560906569163004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,8,128,1,float16,float16,0,0.6470773220062256
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,8,128,1,float16,fp8,0,0.6458293199539185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,1,128,1,float16,float16,0,0.3909813165664673
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,1,128,1,float16,fp8,0,0.3460533221562703
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,48,128,1,float16,float16,0,0.3689226706822713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,48,128,1,float16,fp8,0,0.324069341023763
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,2,128,1,float16,float16,0,0.3853066762288411
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,2,128,1,float16,fp8,0,0.3452213207880656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,4,128,1,float16,float16,0,0.34700266520182294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,4,128,1,float16,fp8,0,0.34378667672475177
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,8,128,1,float16,float16,0,0.3454933166503906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,48,8,128,1,float16,fp8,0,0.321888009707133
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,1,128,1,float16,float16,0,0.21942933400472006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,48,128,1,float16,float16,0,0.2095680038134257
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,4,128,1,float16,float16,0,0.6966826915740967
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,48,128,1,float16,fp8,0,0.18130133549372354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,1,128,1,float16,fp8,0,0.19340799252192178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,4,128,1,float16,float16,0,0.19366933902104697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,2,128,1,float16,fp8,0,0.19572800397872925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,8,128,1,float16,float16,0,0.19392534097035727
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,8,128,1,float16,fp8,0,0.19135467211405435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,48,1,128,1,float16,float16,0,1.4910613695780437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,1,128,1,float16,float16,0,2.9442081451416016
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,1,128,1,float16,fp8,0,2.8815574645996094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,2,128,1,float16,float16,0,3.4961973826090493
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,4,128,1,float16,fp8,0,0.6027093331019083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,2,128,1,float16,float16,0,0.21876267592112222
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,2,128,1,float16,fp8,0,2.765413284301758
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,48,4,128,1,float16,fp8,0,0.18023999532063803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,4,128,1,float16,float16,0,2.9493281046549478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,4,128,1,float16,fp8,0,2.7664801279703775
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,8,128,1,float16,float16,0,3.4737332661946616
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,48,8,128,1,float16,fp8,0,2.7712319691975913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,1,128,1,float16,float16,0,1.4910292625427246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,1,128,1,float16,fp8,0,1.397989273071289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,48,128,1,float16,float16,0,1.5169973373413086
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,2,128,1,float16,float16,0,1.48909330368042
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,48,128,1,float16,fp8,0,1.4100906054178874
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,2,128,1,float16,fp8,0,1.5725866953531902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,4,128,1,float16,float16,0,1.780687967936198
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,48,2,128,1,float16,fp8,0,0.6434026559193929
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,8,128,1,float16,fp8,0,1.3984160423278809
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,4,128,1,float16,fp8,0,1.4871679941813152
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,1,128,1,float16,fp8,0,0.7596533298492432
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,48,128,1,float16,float16,0,0.7782080173492432
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,48,128,1,float16,fp8,0,0.7612106800079346
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,2,128,1,float16,fp8,0,0.7134239673614502
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,2,128,1,float16,float16,0,0.8063786824544271
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,4,128,1,float16,fp8,0,0.8534613450368246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,4,128,1,float16,float16,0,0.7646133104960123
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,1,128,1,float16,float16,0,0.4300533135732015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,8,128,1,float16,fp8,0,0.7149759928385416
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,8,128,1,float16,float16,0,0.7636640071868896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,48,128,1,float16,fp8,0,0.3758346637090047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,4,128,1,float16,float16,0,0.3980106512705485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,8,128,1,float16,float16,0,0.3982079823811849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,8,128,1,float16,fp8,0,0.3725866476694743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,48,1,128,1,float16,float16,0,0.875274658203125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,48,8,128,1,float16,float16,0,1.4932427406311035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,1,128,1,float16,float16,0,0.24453866481781006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,48,128,1,float16,float16,0,0.23925334215164185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,48,128,1,float16,fp8,0,0.2188533345858256
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,2,128,1,float16,float16,0,0.24338666598002115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,2,128,1,float16,fp8,0,0.2179093360900879
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,1,128,1,float16,fp8,0,0.39762131373087567
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,4,128,1,float16,float16,0,0.2363626758257548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,4,128,1,float16,fp8,0,0.21780266364415488
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,8,128,1,float16,fp8,0,0.21776533126831055
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,2,128,1,float16,fp8,0,0.39660267035166424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,1,128,1,float16,float16,0,0.13427733381589255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,4,128,1,float16,fp8,0,0.3733866612116496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,1,128,1,float16,fp8,0,0.12489066521326701
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,48,128,1,float16,float16,0,0.13831466436386108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,48,128,1,float16,fp8,0,0.12504000465075174
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,2,128,1,float16,float16,0,0.13819733262062073
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,2,128,1,float16,fp8,0,0.12446932991345723
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,8,128,1,float16,float16,0,0.1345866620540619
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,4,128,1,float16,fp8,0,0.11606933673222859
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,8,128,1,float16,fp8,0,0.11552533507347107
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,1,128,1,float16,fp8,0,0.21657600005467734
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,1,128,1,float16,float16,0,3.070138613382975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,48,8,128,1,float16,float16,0,0.21886932849884033
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,48,128,1,float16,float16,0,0.4047093391418457
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,1,128,1,float16,fp8,0,2.7950560251871743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,2,128,1,float16,float16,0,2.956282615661621
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,2,128,1,float16,fp8,0,2.9555253982543945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,48,4,128,1,float16,float16,0,0.14134933551152548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,4,128,1,float16,float16,0,2.9622720082600913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,8,128,1,float16,float16,0,2.9658454259236655
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,4,128,1,float16,fp8,0,2.8018293380737305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,48,8,128,1,float16,fp8,0,2.8000052769978843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,1,128,1,float16,float16,0,1.6936373710632324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,1,128,1,float16,fp8,0,1.4106399218241374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,48,128,1,float16,float16,0,1.5234400431315105
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,48,128,1,float16,fp8,0,1.4269013404846191
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,2,128,1,float16,float16,0,1.7196159362792969
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,2,128,1,float16,fp8,0,1.4122079213460286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,4,128,1,float16,fp8,0,1.4880107243855794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,4,128,1,float16,float16,0,1.494965394337972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,8,128,1,float16,float16,0,1.4964693387349446
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,1,128,1,float16,float16,0,0.760154644648234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,48,8,128,1,float16,fp8,0,1.4112213452657063
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,1,128,1,float16,fp8,0,0.7561066945393881
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,48,128,1,float16,float16,0,0.7751519680023193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,48,128,1,float16,fp8,0,0.7598559856414795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,2,128,1,float16,fp8,0,0.7569066683451334
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,4,128,1,float16,float16,0,0.7606026331583658
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,2,128,1,float16,float16,0,0.8742612997690836
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,4,128,1,float16,fp8,0,0.7175947030385336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,8,128,1,float16,fp8,0,0.7179360389709473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,48,8,128,1,float16,float16,0,0.7603519757588705
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,1,128,1,float16,fp8,0,0.3906986713409424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,2,128,1,float16,float16,0,0.39348800977071124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,48,128,1,float16,fp8,0,0.37409067153930664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,4,128,1,float16,float16,0,0.444048007329305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,48,128,1,float16,float16,0,0.4010080099105835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,48,2,128,1,float16,float16,0,0.4071466525395711
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,4,128,1,float16,fp8,0,0.36955734093983966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,8,128,1,float16,float16,0,0.39371732870737713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,1,128,1,float16,float16,0,0.227567990620931
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,1,128,1,float16,fp8,0,0.21068799495697021
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,48,128,1,float16,float16,0,0.216154674688975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,48,128,1,float16,fp8,0,0.1999786694844564
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,2,128,1,float16,float16,0,0.2093706727027893
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,4,128,1,float16,float16,0,0.2113706668217977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,4,128,1,float16,fp8,0,0.21136534214019775
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,8,128,1,float16,float16,0,0.21146667003631592
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,8,128,1,float16,fp8,0,0.19790933529535928
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,1,128,1,float16,float16,0,0.11877333124478658
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,1,128,1,float16,fp8,0,0.11028266946474712
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,2,128,1,float16,fp8,0,0.10945600271224976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,1,128,1,float16,float16,0,0.4413439830144246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,4,128,1,float16,float16,0,0.11876799662907918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,2,128,1,float16,fp8,0,0.3972853422164917
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,8,128,1,float16,float16,0,0.11958400408426921
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,8,128,1,float16,fp8,0,0.11036266883214314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,48,8,128,1,float16,fp8,0,0.3917280038197835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,1,128,1,float16,float16,0,0.07864533364772797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,1,128,1,float16,fp8,0,0.07197333375612895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,48,2,128,1,float16,fp8,0,0.1965706745783488
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,48,128,1,float16,fp8,0,0.067071999112765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,2,128,1,float16,float16,0,0.07944533228874207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,2,128,1,float16,fp8,0,0.07102933526039124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,4,128,1,float16,float16,0,0.07069866855939229
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,8,128,1,float16,float16,0,0.07172800103823344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,48,128,1,float16,fp8,0,0.11989866693814595
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,4,128,1,float16,fp8,0,0.11823999881744385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,1,128,1,float16,float16,0,2.050266742706299
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,1,128,1,float16,fp8,0,1.7737226486206055
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,2,128,1,float16,float16,0,1.8611733118693035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,48,128,1,float16,float16,0,0.0795253316561381
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,2,128,1,float16,fp8,0,1.8956319491068523
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,4,128,1,float16,fp8,0,0.07206933200359344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,4,128,1,float16,float16,0,1.8645119667053223
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,48,8,128,1,float16,fp8,0,0.06589333216349284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,8,128,1,float16,float16,0,1.868010679880778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,4,128,1,float16,fp8,0,1.8588746388753254
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,1,128,1,float16,float16,0,1.04530135790507
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,48,8,128,1,float16,fp8,0,1.8612693150838215
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,48,128,1,float16,fp8,0,0.9077173074086508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,48,128,1,float16,float16,0,0.9646506309509277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,1,128,1,float16,fp8,0,0.9397439956665039
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,2,128,1,float16,fp8,0,0.9401813348134359
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,2,128,1,float16,float16,0,1.064517339070638
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,4,128,1,float16,float16,0,0.9878826936086019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,4,128,1,float16,fp8,0,0.8965226809183756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,8,128,1,float16,fp8,0,0.9399306774139404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,1,128,1,float16,float16,0,0.53438933690389
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,1,128,1,float16,fp8,0,0.48122668266296387
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,2,128,1,float16,fp8,0,0.4815680185953776
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,48,128,1,float16,float16,0,0.4941759904225667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,4,128,1,float16,float16,0,0.5092480182647705
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,4,128,1,float16,fp8,0,0.4814186493555705
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,2,128,1,float16,float16,0,0.11844266454378764
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,8,128,1,float16,float16,0,0.4910506804784139
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,8,128,1,float16,fp8,0,0.4590826829274495
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,1,128,1,float16,float16,0,0.27822933594385785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,1,128,1,float16,fp8,0,0.2537279923756917
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,48,128,1,float16,float16,0,0.269978662331899
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,48,128,1,float16,fp8,0,0.24276800950368246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,2,128,1,float16,float16,0,0.27405333518981934
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,4,128,1,float16,float16,0,0.2704586585362752
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,4,128,1,float16,fp8,0,0.25301865736643475
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,8,128,1,float16,float16,0,0.25334932406743366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,8,128,1,float16,fp8,0,0.2515626748402913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,48,8,128,1,float16,float16,0,1.0757546424865723
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,1,128,1,float16,fp8,0,0.13904000322024027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,1,128,1,float16,float16,0,0.1409066617488861
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,2,128,1,float16,float16,0,0.5056639909744263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,48,128,1,float16,float16,0,0.15591999888420105
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,2,128,1,float16,fp8,0,0.12980799873669943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,48,128,1,float16,fp8,0,0.13381333152453104
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,48,48,128,1,float16,fp8,0,0.4637920061747233
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,4,128,1,float16,float16,0,0.14751999576886496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,8,128,1,float16,fp8,0,0.13181333740552267
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,8,128,1,float16,float16,0,0.1393013298511505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,1,128,1,float16,float16,0,0.08100266754627228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,1,128,1,float16,fp8,0,0.0745600014925003
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,48,128,1,float16,fp8,0,0.07622933387756348
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,2,128,1,float16,float16,0,0.080335999528567
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,2,128,1,float16,fp8,0,0.07273066540559132
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,48,128,1,float16,float16,0,0.0827946662902832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,4,128,1,float16,float16,0,0.0857973297437032
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,8,128,1,float16,float16,0,0.0798933357000351
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,48,2,128,1,float16,fp8,0,0.2516480088233948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,8,128,1,float16,fp8,0,0.07922133306662242
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,1,128,1,float16,float16,0,0.053957333167394005
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,1,128,1,float16,fp8,0,0.049839998284975685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,2,128,1,float16,float16,0,0.05392000079154968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,48,128,1,float16,fp8,0,0.047653332352638245
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,2,128,1,float16,fp8,0,0.048623998959859215
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,4,128,1,float16,float16,0,0.04974933465321859
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,2,128,1,float16,float16,0,0.1518826683362325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,4,128,1,float16,fp8,0,0.04879466692606608
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,8,128,1,float16,float16,0,0.04987200101216634
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,8,128,1,float16,fp8,0,0.049642667174339294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,48,4,128,1,float16,fp8,0,0.13802133003870645
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,48,4,128,1,float16,fp8,0,0.07459733386834462
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,1,128,1,float16,float16,0,2.0393600463867188
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,1,128,1,float16,fp8,0,1.9159520467122395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,2,128,1,float16,float16,0,2.075904051462809
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,2,128,1,float16,fp8,0,1.9148213068644206
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,48,48,128,1,float16,float16,0,0.05159999926884969
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,8,128,1,float16,float16,0,2.0041866302490234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,8,128,1,float16,fp8,0,1.91759459177653
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,4,128,1,float16,fp8,0,1.9148586591084797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,1,128,1,float16,float16,0,1.088037331899007
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,48,48,128,1,float16,float16,0,0.13246933619181314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,48,4,128,1,float16,float16,0,1.9957547187805176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,48,128,1,float16,float16,0,1.0299306710561116
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,1,128,1,float16,fp8,0,0.9746932983398438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,48,128,1,float16,fp8,0,0.975717306137085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,2,128,1,float16,fp8,0,1.0051146348317463
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,4,128,1,float16,float16,0,1.0965920289357503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,8,128,1,float16,fp8,0,1.0039253234863281
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,4,128,1,float16,fp8,0,0.9664479891459147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,1,128,1,float16,float16,0,0.5451573530832926
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,8,128,1,float16,float16,0,1.0112640062967937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,48,128,1,float16,fp8,0,0.49643198649088544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,2,128,1,float16,float16,0,0.5330933332443237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,2,128,1,float16,fp8,0,0.49243199825286865
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,1,128,1,float16,fp8,0,0.5144213438034058
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,4,128,1,float16,float16,0,0.5385919809341431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,8,128,1,float16,float16,0,0.5145759979883829
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,4,128,1,float16,fp8,0,0.5146079858144125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,1,128,1,float16,float16,0,0.2884693344434102
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,8,128,1,float16,fp8,0,0.4925280014673869
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,1,128,1,float16,fp8,0,0.265669325987498
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,2,128,1,float16,fp8,0,0.2657173275947571
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,4,128,1,float16,float16,0,0.28541332483291626
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,4,128,1,float16,fp8,0,0.266975998878479
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,48,128,1,float16,fp8,0,0.2571626702944438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,2,128,1,float16,float16,0,0.2909333308537801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,48,128,1,float16,float16,0,0.27423999706904095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,8,128,1,float16,float16,0,0.26740266879399616
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,1,128,1,float16,float16,0,0.1565600037574768
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,2,128,1,float16,float16,0,0.15311466654141745
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,1,128,1,float16,fp8,0,0.14387733737627664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,48,128,1,float16,float16,0,0.14653866489728293
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,4,128,1,float16,float16,0,0.1515679955482483
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,4,128,1,float16,fp8,0,0.14150399963061014
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,8,128,1,float16,float16,0,0.1436906655629476
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,48,48,128,1,float16,float16,0,0.5239200194676717
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,8,128,1,float16,fp8,0,0.14198933045069376
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,1,128,1,float16,fp8,0,0.0790826678276062
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,1,128,1,float16,float16,0,0.08802133798599243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,2,128,1,float16,float16,0,0.08475733796755473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,48,128,1,float16,fp8,0,0.07778133451938629
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,2,128,1,float16,fp8,0,0.08061333497365315
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,4,128,1,float16,float16,0,0.08576533198356628
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,48,128,1,float16,float16,0,0.0881173312664032
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,4,128,1,float16,fp8,0,0.08066133161385854
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,8,128,1,float16,fp8,0,0.07936533292134602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,48,8,128,1,float16,float16,0,0.08127466837565105
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,48,2,128,1,float16,float16,0,1.1225333213806152
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,1,128,1,float16,float16,0,0.05136000116666158
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,1,128,1,float16,fp8,0,0.04534933467706045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,48,8,128,1,float16,fp8,0,0.266213337580363
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,2,128,1,float16,fp8,0,0.04756799836953481
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,48,128,1,float16,float16,0,0.055760001142819725
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,4,128,1,float16,float16,0,0.04780266682306925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,4,128,1,float16,fp8,0,0.04785599807898203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,48,128,1,float16,fp8,0,0.04587199787298838
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,8,128,1,float16,float16,0,0.048138668139775596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,1,128,1,float16,float16,0,0.036933332681655884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,48,128,1,float16,fp8,0,0.1379093329111735
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,2,128,1,float16,float16,0,0.03731200098991394
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,48,128,1,float16,fp8,0,0.033589333295822144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,2,128,1,float16,fp8,0,0.0334346666932106
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,48,128,1,float16,float16,0,0.03383466601371765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,4,128,1,float16,float16,0,0.037061333656311035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,4,128,1,float16,fp8,0,0.033157333731651306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,8,128,1,float16,fp8,0,0.03136000037193298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,8,128,1,float16,float16,0,0.03346666693687439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,2,128,1,float16,float16,0,0.05251200000445048
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,1,128,1,float16,float16,0,1.5524053573608398
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,1,128,1,float16,fp8,0,1.488357384999593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,2,128,1,float16,float16,0,1.508080005645752
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,48,8,128,1,float16,fp8,0,0.04691733419895172
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,2,128,1,float16,fp8,0,1.4664799372355144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,48,1,128,1,float16,fp8,0,0.033488000432650246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,48,2,128,1,float16,fp8,0,0.14357866843541464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,1,128,1,float16,float16,0,0.7814293702443441
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,48,128,1,float16,float16,0,0.778218666712443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,4,128,1,float16,fp8,0,1.5030560493469238
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,48,128,1,float16,fp8,0,0.7435359954833984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,4,128,1,float16,float16,0,1.5368213653564453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,1,128,1,float16,fp8,0,0.7584853172302246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,8,128,1,float16,float16,0,1.5216320355733235
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,48,8,128,1,float16,fp8,0,1.4662559827168782
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,4,128,1,float16,float16,0,0.7846133708953857
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,2,128,1,float16,float16,0,0.7639893690745035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,2,128,1,float16,fp8,0,0.7582826614379883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,8,128,1,float16,float16,0,0.7626399993896484
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,1,128,1,float16,float16,0,0.3885973294576009
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,2,128,1,float16,float16,0,0.38843198617299396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,8,128,1,float16,fp8,0,0.7568960189819336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,1,128,1,float16,fp8,0,0.38701868057250977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,4,128,1,float16,fp8,0,0.37748801708221436
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,4,128,1,float16,float16,0,0.3889760176340739
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,48,128,1,float16,fp8,0,0.37913068135579425
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,8,128,1,float16,float16,0,0.40646934509277344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,2,128,1,float16,fp8,0,0.38856534163157147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,1,128,1,float16,float16,0,0.2153493364651998
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,8,128,1,float16,fp8,0,0.3872160116831462
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,48,4,128,1,float16,fp8,0,0.7579360008239746
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,2,128,1,float16,float16,0,0.21554666757583618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,2,128,1,float16,fp8,0,0.2006666660308838
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,48,128,1,float16,float16,0,0.20612800121307373
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,48,128,1,float16,fp8,0,0.19704532623291016
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,1,128,1,float16,fp8,0,0.20221867163976034
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,8,128,1,float16,fp8,0,0.20096532503763834
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,8,128,1,float16,float16,0,0.20273600021998087
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,4,128,1,float16,float16,0,0.20595733324686685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,1,128,1,float16,float16,0,0.10958400368690491
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,48,4,128,1,float16,fp8,0,0.20207999149958292
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,48,48,128,1,float16,float16,0,0.3962399959564209
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,48,128,1,float16,float16,0,0.1239466667175293
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,1,128,1,float16,fp8,0,0.10584533214569092
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,4,128,1,float16,float16,0,0.11195199688275655
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,2,128,1,float16,float16,0,0.11763200163841248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,8,128,1,float16,float16,0,0.11268799503644307
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,1,128,1,float16,float16,0,0.06528000036875407
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,8,128,1,float16,fp8,0,0.1099626620610555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,2,128,1,float16,float16,0,0.06585599978764851
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,48,128,1,float16,float16,0,0.06404800216356914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,1,128,1,float16,fp8,0,0.06107733150323232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,4,128,1,float16,float16,0,0.06577066580454509
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,8,128,1,float16,float16,0,0.06418666740258534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,8,128,1,float16,fp8,0,0.06083733340104421
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,48,128,1,float16,float16,0,0.039621333281199135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,1,128,1,float16,float16,0,0.037989333271980286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,4,128,1,float16,fp8,0,0.10588799913724263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,1,128,1,float16,fp8,0,0.035760000348091125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,2,128,1,float16,fp8,0,0.1093386709690094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,2,128,1,float16,float16,0,0.03756266583998998
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,4,128,1,float16,float16,0,0.037392000357309975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,8,128,1,float16,float16,0,0.037461332976818085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,8,128,1,float16,fp8,0,0.03515200068553289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,2,128,1,float16,fp8,0,0.06091199815273285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,48,128,1,float16,fp8,0,0.06002666552861532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,48,4,128,1,float16,fp8,0,0.06141333281993866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,1,128,1,float16,float16,0,0.027322667340437572
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,1,128,1,float16,fp8,0,0.02531733363866806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,48,128,1,float16,float16,0,0.025386666258176167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,2,128,1,float16,fp8,0,0.02499199906984965
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,4,128,1,float16,float16,0,0.02537599951028824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,4,128,1,float16,fp8,0,0.025242666403452556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,8,128,1,float16,float16,0,0.025413334369659424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,8,128,1,float16,fp8,0,0.02492266645034154
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,48,48,128,1,float16,fp8,0,0.10612799723943074
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,2,128,1,float16,fp8,0,0.035599999129772186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,4,128,1,float16,fp8,0,0.03541333228349686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,1,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,2,128,1,float16,fp8,0,0.01728533332546552
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,4,128,1,float16,float16,0,0.017152000218629837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,4,128,1,float16,fp8,0,0.01703466723362605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,8,128,1,float16,float16,0,0.01711999997496605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,8,128,1,float16,fp8,0,0.017157333592573803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,2,128,1,float16,float16,0,0.027119999130566914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,48,48,128,1,float16,fp8,0,0.025018667181332905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,48,48,128,1,float16,fp8,0,0.03549333413441976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,1,128,1,float16,float16,0,0.6291999816894531
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,1,128,1,float16,fp8,0,0.625759998957316
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,2,128,1,float16,float16,0,0.6235040028889974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,1,128,1,float16,float16,0,0.01858666663368543
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,2,128,1,float16,fp8,0,0.6268426577250162
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,4,128,1,float16,float16,0,0.6263146797815958
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,4,128,1,float16,fp8,0,0.6188213427861532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,8,128,1,float16,float16,0,0.6248159805933634
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,48,8,128,1,float16,fp8,0,0.6176160176595052
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,48,128,1,float16,float16,0,0.3226613402366638
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,1,128,1,float16,float16,0,0.327349325021108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,48,128,1,float16,fp8,0,0.316048006216685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,1,128,1,float16,fp8,0,0.3189013401667277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,2,128,1,float16,fp8,0,0.31989334026972455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,4,128,1,float16,float16,0,0.3245866696039836
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,2,128,1,float16,float16,0,0.017968000223239262
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,2,128,1,float16,float16,0,0.33371734619140625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,4,128,1,float16,fp8,0,0.3148533304532369
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,1,128,1,float16,float16,0,0.17277334133783975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,8,128,1,float16,float16,0,0.3183786670366923
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,48,128,1,float16,fp8,0,0.1635040044784546
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,2,128,1,float16,float16,0,0.1727893352508545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,48,8,128,1,float16,fp8,0,0.3138879934946696
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,1,128,1,float16,fp8,0,0.16247999668121338
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,48,128,1,float16,float16,0,0.1686346729596456
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,2,128,1,float16,fp8,0,0.16528000434239706
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,4,128,1,float16,float16,0,0.16455466548601785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,8,128,1,float16,float16,0,0.1681493322054545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,8,128,1,float16,fp8,0,0.16245866815249124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,1,128,1,float16,float16,0,0.09410132964452107
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,48,128,1,float16,float16,0,0.017935999979575474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,2,128,1,float16,float16,0,0.09392000238100688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,48,128,1,float16,float16,0,0.09297600388526917
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,48,128,1,float16,fp8,0,0.08983467022577922
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,1,128,1,float16,fp8,0,0.08880000313123067
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,4,128,1,float16,float16,0,0.09309867024421692
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,2,128,1,float16,fp8,0,0.08956799904505412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,4,128,1,float16,fp8,0,0.09087999661763509
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,8,128,1,float16,fp8,0,0.08867200215657552
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,1,128,1,float16,float16,0,0.055018668373425804
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,1,128,1,float16,fp8,0,0.050213331977526345
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,2,128,1,float16,float16,0,0.05390933156013489
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,2,128,1,float16,fp8,0,0.04978133241335551
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,48,128,1,float16,fp8,0,0.052058666944503784
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,48,128,1,float16,float16,0,0.05472533404827118
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,4,128,1,float16,float16,0,0.05576533575852712
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,4,128,1,float16,fp8,0,0.04991999765237173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,8,128,1,float16,float16,0,0.05604266623655955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,48,4,128,1,float16,fp8,0,0.16160533825556436
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,48,8,128,1,float16,fp8,0,0.05201066533724467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,1,128,1,float16,float16,0,0.03124266614516576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,1,128,1,float16,fp8,0,0.029274667302767437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,4,128,1,float16,fp8,0,0.029418667157491047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,48,128,1,float16,float16,0,0.03143999973932902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,4,128,1,float16,float16,0,0.030495998760064442
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,2,128,1,float16,fp8,0,0.029445332785447437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,48,48,128,1,float16,fp8,0,0.016794666647911072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,8,128,1,float16,float16,0,0.029520000020662945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,8,128,1,float16,fp8,0,0.02923733244339625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,48,128,1,float16,fp8,0,0.021322667598724365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,1,128,1,float16,fp8,0,0.021536000072956085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,48,8,128,1,float16,float16,0,0.09174933036168416
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,2,128,1,float16,fp8,0,0.021253332495689392
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,4,128,1,float16,float16,0,0.023002666731675465
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,4,128,1,float16,fp8,0,0.021045332153638203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,8,128,1,float16,float16,0,0.022645334402720135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,8,128,1,float16,fp8,0,0.021274665991465252
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,1,128,1,float16,float16,0,0.015066667149464289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,1,128,1,float16,fp8,0,0.01481066644191742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,2,128,1,float16,float16,0,0.014725333700577417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,48,128,1,float16,fp8,0,0.014842666685581207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,2,128,1,float16,fp8,0,0.014767999450365702
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,2,128,1,float16,float16,0,0.029311999678611755
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,4,128,1,float16,float16,0,0.015040000279744467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,48,128,1,float16,float16,0,0.014789332946141561
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,8,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,8,128,1,float16,float16,0,0.014736000448465347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,48,48,128,1,float16,fp8,0,0.02918400118748347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,1,128,1,float16,float16,0,0.014789332946141561
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,1,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,2,128,1,float16,float16,0,0.014783999572197596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,48,128,1,float16,float16,0,0.014752000570297241
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,2,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,1,128,1,float16,float16,0,0.022704000274340313
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,48,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,4,128,1,float16,float16,0,0.014826666563749313
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,4,128,1,float16,fp8,0,0.013898666948080063
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,8,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,48,8,128,1,float16,float16,0,0.01310933381319046
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,48,128,1,float16,float16,0,0.02126399924357732
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,1,128,1,float16,fp8,0,0.38450666268666583
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,1,128,1,float16,float16,0,0.3900853395462036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,2,128,1,float16,fp8,0,0.3856853246688843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,4,128,1,float16,float16,0,0.38939201831817627
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,2,128,1,float16,float16,0,0.39193065961201984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,48,4,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,4,128,1,float16,fp8,0,0.38571735223134357
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,8,128,1,float16,float16,0,0.3895253340403239
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,1,128,1,float16,fp8,0,0.19805866479873657
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,48,128,1,float16,float16,0,0.20257065693537393
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,48,128,1,float16,fp8,0,0.19724265734354654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,48,2,128,1,float16,float16,0,0.02130666623512904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,2,128,1,float16,float16,0,0.19981332619984946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,4,128,1,float16,float16,0,0.20253866910934448
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,2,128,1,float16,fp8,0,0.19951999187469482
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,1,128,1,float16,float16,0,0.105103999376297
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,8,128,1,float16,float16,0,0.20380266507466635
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,8,128,1,float16,fp8,0,0.19702933231989542
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,1,128,1,float16,fp8,0,0.10288000106811523
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,48,128,1,float16,float16,0,0.10724266370137532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,48,128,1,float16,fp8,0,0.1034453312555949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,2,128,1,float16,fp8,0,0.10292800267537434
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,4,128,1,float16,float16,0,0.10652800401051839
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,8,128,1,float16,fp8,0,0.10287466645240784
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,4,128,1,float16,fp8,0,0.10287466645240784
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,48,8,128,1,float16,fp8,0,0.38516799608866376
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,1,128,1,float16,float16,0,0.20587199926376343
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,1,128,1,float16,float16,0,0.05860800047715505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,2,128,1,float16,float16,0,0.062133332093556724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,48,128,1,float16,float16,0,0.059877331058184304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,2,128,1,float16,fp8,0,0.05629333357016245
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,48,128,1,float16,fp8,0,0.05773333211739858
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,1,128,1,float16,fp8,0,0.057205334305763245
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,4,128,1,float16,float16,0,0.05956799785296122
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,8,128,1,float16,fp8,0,0.057461331288019814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,8,128,1,float16,float16,0,0.060138667623202004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,48,4,128,1,float16,fp8,0,0.19941866397857666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,1,128,1,float16,float16,0,0.034261333445707955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,1,128,1,float16,fp8,0,0.03347733368476232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,2,128,1,float16,float16,0,0.0339626669883728
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,2,128,1,float16,fp8,0,0.033402666449546814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,48,128,1,float16,fp8,0,0.03347733368476232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,4,128,1,float16,float16,0,0.03341866781314214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,4,128,1,float16,fp8,0,0.033488000432650246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,8,128,1,float16,float16,0,0.033941333492596946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,8,128,1,float16,fp8,0,0.03325333446264267
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,1,128,1,float16,float16,0,0.021013334393501282
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,8,128,1,float16,float16,0,0.10523200035095215
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,1,128,1,float16,fp8,0,0.020842666427294414
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,2,128,1,float16,fp8,0,0.020986666282018025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,48,128,1,float16,fp8,0,0.021327999730904896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,2,128,1,float16,float16,0,0.02090666691462199
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,48,128,1,float16,float16,0,0.021274665991465252
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,4,128,1,float16,float16,0,0.0210506667693456
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,4,128,1,float16,fp8,0,0.021231998999913532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,8,128,1,float16,float16,0,0.020975999534130096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,1,128,1,float16,float16,0,0.015050667027632395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,48,4,128,1,float16,fp8,0,0.05659199754397074
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,1,128,1,float16,fp8,0,0.015077333897352219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,2,128,1,float16,fp8,0,0.014805333067973455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,4,128,1,float16,fp8,0,0.015082667271296183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,48,48,128,1,float16,float16,0,0.035455999275048576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,48,2,128,1,float16,float16,0,0.107232004404068
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,8,128,1,float16,fp8,0,0.014837333311637243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,8,128,1,float16,float16,0,0.015087999403476715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,1,128,1,float16,float16,0,0.011061333119869232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,1,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,2,128,1,float16,float16,0,0.011039999624093374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,48,128,1,float16,float16,0,0.01102399950226148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,2,128,1,float16,fp8,0,0.010928000013033548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,4,128,1,float16,float16,0,0.010885333021481832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,4,128,1,float16,fp8,0,0.010778666784365972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,8,128,1,float16,float16,0,0.010768000036478043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,8,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,48,128,1,float16,float16,0,0.010602666685978571
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,2,128,1,float16,float16,0,0.015082667271296183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,1,128,1,float16,float16,0,0.010645333677530289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,4,128,1,float16,float16,0,0.015178666760524115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,2,128,1,float16,float16,0,0.010650667051474253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,2,128,1,float16,fp8,0,0.010709332923094431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,4,128,1,float16,float16,0,0.010735999792814255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,4,128,1,float16,fp8,0,0.0107893335322539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,8,128,1,float16,float16,0,0.010741333166758219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,8,128,1,float16,fp8,0,0.010656000425418219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,48,48,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,48,8,128,1,float16,fp8,0,0.020879998803138733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,1,128,1,float16,float16,0,0.3079199989636739
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,1,128,1,float16,fp8,0,0.30353599786758423
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,2,128,1,float16,float16,0,0.3097760081291199
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,48,128,1,float16,fp8,0,0.015285332997639975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,1,128,1,float16,fp8,0,0.01080000028014183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,4,128,1,float16,float16,0,0.3078346649805705
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,4,128,1,float16,fp8,0,0.3025706609090169
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,2,128,1,float16,fp8,0,0.3040906588236491
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,8,128,1,float16,float16,0,0.31069332361221313
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,1,128,1,float16,float16,0,0.16555733482042947
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,1,128,1,float16,fp8,0,0.15844266613324484
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,48,128,1,float16,fp8,0,0.15574399630228677
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,48,8,128,1,float16,fp8,0,0.3017759919166565
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,2,128,1,float16,fp8,0,0.15677332878112793
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,4,128,1,float16,float16,0,0.16389333208402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,48,48,128,1,float16,float16,0,0.015205333630243937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,8,128,1,float16,fp8,0,0.15685866276423135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,48,128,1,float16,float16,0,0.1609226663907369
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,8,128,1,float16,float16,0,0.16086933016777039
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,48,128,1,float16,float16,0,0.08616532882054646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,4,128,1,float16,fp8,0,0.1562933325767517
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,1,128,1,float16,float16,0,0.08614400029182434
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,2,128,1,float16,float16,0,0.0869813362757365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,4,128,1,float16,fp8,0,0.08061866462230682
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,4,128,1,float16,float16,0,0.08543999989827473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,48,128,1,float16,fp8,0,0.08210133512814839
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,8,128,1,float16,float16,0,0.08561600248018901
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,1,128,1,float16,fp8,0,0.08385066191355388
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,2,128,1,float16,fp8,0,0.0839413305123647
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,48,8,128,1,float16,fp8,0,0.08136533200740814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,1,128,1,float16,float16,0,0.04774933556715647
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,48,2,128,1,float16,float16,0,0.16478400429089865
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,2,128,1,float16,float16,0,0.04790933430194855
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,4,128,1,float16,float16,0,0.04724800089995066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,2,128,1,float16,fp8,0,0.04558933277924856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,8,128,1,float16,float16,0,0.04665599763393402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,48,128,1,float16,float16,0,0.04836800197760264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,8,128,1,float16,fp8,0,0.04584000011285146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,48,128,1,float16,fp8,0,0.04566933214664459
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,48,48,128,1,float16,fp8,0,0.010735999792814255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,4,128,1,float16,fp8,0,0.045925334095954895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,1,128,1,float16,fp8,0,0.027461332579453785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,4,128,1,float16,float16,0,0.02923733244339625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,2,128,1,float16,fp8,0,0.027434666951497395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,1,128,1,float16,float16,0,0.028688001135985058
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,2,128,1,float16,float16,0,0.028832000990708668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,4,128,1,float16,fp8,0,0.027386667827765148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,48,128,1,float16,float16,0,0.02831999957561493
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,8,128,1,float16,float16,0,0.029205332199732464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,48,128,1,float16,fp8,0,0.027445333699385326
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,1,128,1,float16,fp8,0,0.017045332739750545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,48,128,1,float16,float16,0,0.017082666357358296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,48,128,1,float16,fp8,0,0.01709866647919019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,2,128,1,float16,float16,0,0.018816000471512478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,2,128,1,float16,fp8,0,0.016837333639462788
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,4,128,1,float16,float16,0,0.017290666699409485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,8,128,1,float16,float16,0,0.017279999951521557
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,8,128,1,float16,fp8,0,0.016938666502634685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,48,1,128,1,float16,fp8,0,0.04598933458328247
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,1,128,1,float16,float16,0,0.013503999759753546
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,1,128,1,float16,fp8,0,0.013381333400805792
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,2,128,1,float16,float16,0,0.01351999988158544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,2,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,48,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,4,128,1,float16,float16,0,0.013546666751305262
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,8,128,1,float16,float16,0,0.013104000439246496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,8,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,48,8,128,1,float16,fp8,0,0.027402666707833607
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,1,128,1,float16,float16,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,1,128,1,float16,fp8,0,0.010661333799362183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,2,128,1,float16,float16,0,0.010602666685978571
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,2,128,1,float16,fp8,0,0.01033599985142549
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,48,128,1,float16,fp8,0,0.010666667173306147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,4,128,1,float16,float16,0,0.010650667051474253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,1,128,1,float16,float16,0,0.01899733394384384
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,8,128,1,float16,float16,0,0.010762666662534079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,8,128,1,float16,fp8,0,0.010794666906197866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,48,4,128,1,float16,fp8,0,0.01722666621208191
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,1,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,2,128,1,float16,float16,0,0.009354666496316591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,48,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,2,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,4,128,1,float16,float16,0,0.009082666908701261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,48,128,1,float16,float16,0,0.012960000584522883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,4,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,8,128,1,float16,float16,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,8,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,1,128,1,float16,float16,0,0.27005332708358765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,48,128,1,float16,float16,0,0.010709332923094431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,1,128,1,float16,fp8,0,0.26588799556096393
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,2,128,1,float16,float16,0,0.26867733399073285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,48,4,128,1,float16,fp8,0,0.009925333162148794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,1,128,1,float16,float16,0,0.010746666540702185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,2,128,1,float16,fp8,0,0.2628213365872701
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,4,128,1,float16,fp8,0,0.26494399706522626
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,4,128,1,float16,float16,0,0.27847466866175336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,48,48,128,1,float16,float16,0,0.009205333267649015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,8,128,1,float16,fp8,0,0.2626240054766337
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,48,4,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,1,128,1,float16,float16,0,0.14630400141080221
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,48,128,1,float16,fp8,0,0.13596799969673157
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,1,128,1,float16,fp8,0,0.13949333628018698
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,2,128,1,float16,fp8,0,0.13858133554458618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,4,128,1,float16,fp8,0,0.13733333349227905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,4,128,1,float16,float16,0,0.14542399843533835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,8,128,1,float16,fp8,0,0.1371999979019165
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,8,128,1,float16,float16,0,0.14085867007573447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,48,128,1,float16,float16,0,0.07850133379300435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,1,128,1,float16,fp8,0,0.07213333249092102
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,1,128,1,float16,float16,0,0.07980266710122426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,2,128,1,float16,float16,0,0.07453866799672444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,48,8,128,1,float16,float16,0,0.2681279977162679
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,4,128,1,float16,float16,0,0.07859200239181519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,2,128,1,float16,fp8,0,0.07237333556016286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,8,128,1,float16,float16,0,0.07447466750939687
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,4,128,1,float16,fp8,0,0.07291199763615926
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,1,128,1,float16,float16,0,0.04208533465862274
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,1,128,1,float16,fp8,0,0.041077333192030586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,48,128,1,float16,fp8,0,0.039621333281199135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,2,128,1,float16,float16,0,0.04177066683769226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,2,128,1,float16,fp8,0,0.041536000867684685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,48,128,1,float16,float16,0,0.043466667334238686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,2,128,1,float16,float16,0,0.14382400115331015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,4,128,1,float16,float16,0,0.04318933188915253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,4,128,1,float16,fp8,0,0.0397119993964831
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,8,128,1,float16,float16,0,0.043477331598599754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,1,128,1,float16,float16,0,0.025397333006064098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,48,8,128,1,float16,fp8,0,0.040762667854626976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,1,128,1,float16,fp8,0,0.025114665428797405
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,2,128,1,float16,float16,0,0.025349333882331848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,48,128,1,float16,fp8,0,0.024986666937669117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,4,128,1,float16,float16,0,0.027024000883102417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,8,128,1,float16,float16,0,0.02629866699377696
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,48,128,1,float16,float16,0,0.025311999022960663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,8,128,1,float16,fp8,0,0.025087999800841015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,1,128,1,float16,float16,0,0.01714133347074191
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,1,128,1,float16,fp8,0,0.016688000410795212
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,48,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,48,128,1,float16,fp8,0,0.07257066667079926
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,48,128,1,float16,float16,0,0.016858667135238647
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,2,128,1,float16,float16,0,0.01718933383623759
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,48,8,128,1,float16,fp8,0,0.07253333429495494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,4,128,1,float16,float16,0,0.016970666746298473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,8,128,1,float16,float16,0,0.017221332838137943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,8,128,1,float16,fp8,0,0.015077333897352219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,48,48,128,1,float16,float16,0,0.14172800381978354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,48,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,48,128,1,float16,float16,0,0.016458666572968166
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,1,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,4,128,1,float16,fp8,0,0.025258667767047882
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,1,128,1,float16,float16,0,0.012906666845083237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,2,128,1,float16,fp8,0,0.012576000144084295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,2,128,1,float16,float16,0,0.013221333424250284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,8,128,1,float16,float16,0,0.01292266696691513
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,8,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,4,128,1,float16,fp8,0,0.01653333380818367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,48,128,1,float16,fp8,0,0.009141333401203156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,1,128,1,float16,float16,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,2,128,1,float16,float16,0,0.00916800027092298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,2,128,1,float16,fp8,0,0.008837333569924036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,1,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,4,128,1,float16,fp8,0,0.009125333279371262
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,8,128,1,float16,float16,0,0.009072000160813332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,8,128,1,float16,fp8,0,0.009205333267649015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,48,2,128,1,float16,fp8,0,0.02533866713444392
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,4,128,1,float16,fp8,0,0.012703999876976013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,1,128,1,float16,float16,0,0.009178666397929192
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,1,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,2,128,1,float16,float16,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,2,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,4,128,1,float16,float16,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,48,128,1,float16,fp8,0,0.009072000160813332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,8,128,1,float16,float16,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,8,128,1,float16,fp8,0,0.009093333035707474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,48,2,128,1,float16,fp8,0,0.016399999459584553
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,48,48,128,1,float16,float16,0,0.009658666948477427
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,1,128,1,float16,float16,0,0.27341334025065106
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,2,128,1,float16,float16,0,0.26290132602055866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,1,128,1,float16,fp8,0,0.2485546668370565
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,2,128,1,float16,fp8,0,0.24643733104070029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,48,4,128,1,float16,float16,0,0.012944000462690989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,4,128,1,float16,fp8,0,0.24706133206685385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,8,128,1,float16,float16,0,0.26095465819040936
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,8,128,1,float16,fp8,0,0.2464639941851298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,4,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,48,128,1,float16,float16,0,0.13893333077430725
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,1,128,1,float16,float16,0,0.14041067163149515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,1,128,1,float16,fp8,0,0.1288053294022878
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,2,128,1,float16,fp8,0,0.12954666217168173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,48,128,1,float16,fp8,0,0.1283680001894633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,4,128,1,float16,fp8,0,0.1279039978981018
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,4,128,1,float16,float16,0,0.13803199927012125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,1,128,1,float16,float16,0,0.07427200178305308
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,8,128,1,float16,fp8,0,0.12811199824015299
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,48,4,128,1,float16,float16,0,0.2609226703643799
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,1,128,1,float16,fp8,0,0.06809600194295247
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,2,128,1,float16,float16,0,0.07542400062084198
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,48,128,1,float16,float16,0,0.07515199979146321
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,2,128,1,float16,fp8,0,0.06833066542943318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,48,48,128,1,float16,float16,0,0.009045333291093508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,4,128,1,float16,float16,0,0.0724533349275589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,4,128,1,float16,fp8,0,0.06830400228500366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,8,128,1,float16,float16,0,0.07339733342329662
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,8,128,1,float16,fp8,0,0.0681279997030894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,1,128,1,float16,float16,0,0.043231998880704246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,1,128,1,float16,fp8,0,0.037632000943024956
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,2,128,1,float16,float16,0,0.041840001940727234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,2,128,1,float16,fp8,0,0.03941866755485535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,48,128,1,float16,fp8,0,0.03765333443880081
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,48,128,1,float16,float16,0,0.04363200068473816
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,4,128,1,float16,fp8,0,0.0376800000667572
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,8,128,1,float16,fp8,0,0.03738133360942205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,8,128,1,float16,float16,0,0.04132800052563349
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,2,128,1,float16,float16,0,0.1399946709473928
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,1,128,1,float16,float16,0,0.02510933329661687
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,1,128,1,float16,fp8,0,0.02390933285156886
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,2,128,1,float16,float16,0,0.026176000634829204
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,2,128,1,float16,fp8,0,0.023077333966890972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,4,128,1,float16,float16,0,0.026015999416510265
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,48,8,128,1,float16,float16,0,0.13661332925160727
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,48,128,1,float16,float16,0,0.0252960001428922
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,8,128,1,float16,fp8,0,0.024666666984558105
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,1,128,1,float16,float16,0,0.01681600014368693
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,48,48,128,1,float16,fp8,0,0.0680266668399175
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,1,128,1,float16,fp8,0,0.015018666783968607
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,48,128,1,float16,fp8,0,0.014815999815861383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,2,128,1,float16,fp8,0,0.015018666783968607
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,48,128,1,float16,float16,0,0.016821333517630894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,4,128,1,float16,fp8,0,0.014848000059525171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,8,128,1,float16,float16,0,0.016688000410795212
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,8,128,1,float16,fp8,0,0.01482133318980535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,48,4,128,1,float16,float16,0,0.04135466615358988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,1,128,1,float16,float16,0,0.012831999609867731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,48,128,1,float16,float16,0,0.012757333616415659
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,48,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,1,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,4,128,1,float16,float16,0,0.012970666090647379
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,48,128,1,float16,fp8,0,0.023370665808518726
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,2,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,4,128,1,float16,fp8,0,0.02334933231274287
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,48,8,128,1,float16,float16,0,0.02502399931351344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,4,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,8,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,8,128,1,float16,float16,0,0.012762666990359625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,1,128,1,float16,fp8,0,0.009077333534757296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,48,128,1,float16,fp8,0,0.009152000149091085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,2,128,1,float16,float16,0,0.00996800015370051
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,4,128,1,float16,float16,0,0.01703466723362605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,48,128,1,float16,float16,0,0.009072000160813332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,4,128,1,float16,float16,0,0.008874666566650072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,4,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,8,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,8,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,1,128,1,float16,float16,0,0.009162666896979014
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,1,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,48,128,1,float16,fp8,0,0.009962666779756546
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,48,128,1,float16,float16,0,0.008869333192706108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,4,128,1,float16,float16,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,4,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,48,2,128,1,float16,float16,0,0.012719999998807907
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,8,128,1,float16,float16,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,8,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,1,128,1,float16,float16,0,0.00891733355820179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,1,128,1,float16,float16,0,0.012778667112191519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,48,2,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,1,128,1,float16,fp8,0,0.010751999914646149
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,48,2,128,1,float16,float16,0,0.016730666160583496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,2,128,1,float16,float16,0,0.019253333409627277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,2,128,1,float16,fp8,0,0.01687466725707054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,4,128,1,float16,float16,0,0.02125866711139679
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,0,0.016997333616018295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,2,128,1,float16,float16,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,0,0.02309333284695943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,48,8,128,1,float16,float16,0,0.027130665878454845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,1,128,1,float16,float16,0,0.008757333581646284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,48,2,128,1,float16,fp8,0,0.008634666601816813
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,2,128,1,float16,float16,0,0.012842666357755661
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,48,128,1,float16,float16,0,0.07487466434637706
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,2,128,1,float16,fp8,0,0.010954666882753372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,0,0.01488000030318896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,0,0.012810666114091873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,0,0.017045332739750545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,0,0.014725333700577417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,1,128,1,float16,float16,0,0.008842666943868002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,1,128,1,float16,fp8,0,0.008618666479984919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,2,128,1,float16,float16,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,48,128,1,float16,float16,0,0.04009066770474116
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,48,128,1,float16,fp8,0,0.031285333136717476
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,0,0.010890666395425797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,0,0.012709333250919977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,1,128,1,float16,float16,0,0.008623999853928884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,1,128,1,float16,fp8,0,0.008538666491707167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,48,128,1,float16,fp8,0,0.019194666296243668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,48,128,1,float16,float16,0,0.0222080002228419
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,2,128,1,float16,float16,0,0.008682666967312494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,2,128,1,float16,fp8,0,0.008863999818762144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,1,128,1,float16,fp8,0,0.008757333581646284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,0,0.006858666737874349
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,0,0.009216000015536943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,0,0.008778666456540426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,1,128,1,float16,float16,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,1,128,1,float16,fp8,0,0.008650666723648706
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,2,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,0,0.0069866664707660675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,2,128,1,float16,fp8,0,0.008799999952316284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,0,0.010741333166758219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,0,0.006927999978264173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,0,0.008687999720374743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,0,0.007055999711155891
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,1,128,1,float16,float16,0,0.008559999987483025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,1,128,1,float16,fp8,0,0.008682666967312494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,48,128,1,float16,float16,0,0.010954666882753372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,48,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,2,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,0,0.007205333560705185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,0,0.007247999931375186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,0,0.00707733320693175
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,0,0.006965333595871925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,48,48,128,1,float16,fp8,0,0.05600533386071523
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,1,128,1,float16,float16,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,48,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,1,128,1,float16,fp8,0,0.008586666857202848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,48,128,1,float16,float16,0,0.008634666601816813
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,2,128,1,float16,fp8,0,0.008714666590094566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,2,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,0,0.0074506668994824094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,48,128,1,float16,float16,0,0.014901333798964819
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,0,0.009119999905427298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,0,0.006895999734600385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,0,0.006821333120266597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,0,0.006640000268816948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,1,128,1,float16,float16,0,0.008559999987483025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,1,128,1,float16,fp8,0,0.008682666967312494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,2,128,1,float16,fp8,0,0.008618666479984919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,48,128,1,float16,fp8,0,0.00697066696981589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,0,0.006895999734600385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,48,128,1,float16,float16,0,0.00701333334048589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,0,0.006837333242098491
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,48,2,128,1,float16,float16,0,0.008645333349704742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,0,0.006634666894872983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,1,128,1,float16,float16,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,1,128,1,float16,fp8,0,0.006821333120266597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,2,128,1,float16,float16,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,2,128,1,float16,fp8,0,0.008736000085870424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,48,128,1,float16,fp8,0,0.006864000111818314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,48,128,1,float16,float16,0,0.007093333328763644
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,0,0.00696000022192796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,0,0.006965333595871925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,48,2,128,1,float16,float16,0,0.00884799969693025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,48,48,128,1,float16,fp8,0,0.012730666746695837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,2,128,1,float16,float16,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,0,0.0069919998447100324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,0,0.006698666761318843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,0,0.006575999781489372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,1,128,1,float16,float16,0,13.715476989746094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,1,128,1,float16,fp8,0,12.663455963134766
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,2,128,1,float16,float16,0,13.708661397298178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,2,128,1,float16,fp8,0,12.678362528483072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,4,128,1,float16,float16,0,13.717488606770834
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,4,128,1,float16,fp8,0,13.342997233072916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,8,128,1,float16,float16,0,13.714847564697266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,40,8,128,1,float16,fp8,0,12.6714235941569
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,40,128,1,float16,float16,0,6.93942387898763
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,1,128,1,float16,fp8,0,6.798245112101237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,2,128,1,float16,float16,0,6.9242509206136065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,2,128,1,float16,fp8,0,6.795461018880208
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,4,128,1,float16,fp8,0,6.925061543782552
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,40,128,1,float16,fp8,0,6.424746831258138
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,8,128,1,float16,float16,0,6.926991780598958
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,8,128,1,float16,fp8,0,6.418826421101888
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,40,128,1,float16,float16,0,3.5356054306030273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,4,128,1,float16,float16,0,9.525189081827799
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,40,128,1,float16,fp8,0,3.270928064982096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,40,1,128,1,float16,float16,0,9.599258422851562
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,1,128,1,float16,float16,0,4.256848017374675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,1,128,1,float16,fp8,0,3.3603785832722983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,2,128,1,float16,fp8,0,3.463616053263346
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,2,128,1,float16,float16,0,4.3417971928914385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,4,128,1,float16,float16,0,4.400176048278809
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,1,128,1,float16,float16,0,1.955567995707194
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,8,128,1,float16,fp8,0,3.4589014053344727
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,8,128,1,float16,float16,0,4.314874649047852
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,1,128,1,float16,fp8,0,2.043615976969401
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,40,128,1,float16,fp8,0,1.6873547236124675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,40,128,1,float16,float16,0,1.9453546206156414
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,2,128,1,float16,fp8,0,1.776261329650879
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,4,128,1,float16,float16,0,1.8411733309427898
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,4,128,1,float16,fp8,0,1.9363199869791667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,8,128,1,float16,float16,0,2.230682690938314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,8,128,1,float16,fp8,0,1.685914675394694
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,40,4,128,1,float16,fp8,0,4.027914683024089
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,1,128,1,float16,fp8,0,7.327327728271484
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,40,2,128,1,float16,float16,0,2.307957331339518
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,2,128,1,float16,fp8,0,7.412911732991536
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,4,128,1,float16,fp8,0,7.3206024169921875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,4,128,1,float16,float16,0,8.440885543823242
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,2,128,1,float16,float16,0,10.01202646891276
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,8,128,1,float16,float16,0,7.914384206136067
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,40,128,1,float16,float16,0,4.020928064982097
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,1,128,1,float16,float16,0,4.262672106424968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,40,128,1,float16,fp8,0,3.9583307902018228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,1,128,1,float16,fp8,0,4.004368146260579
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,1,128,1,float16,float16,0,7.925120035807292
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,2,128,1,float16,float16,0,5.307813326517741
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,40,8,128,1,float16,fp8,0,7.980469385782878
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,2,128,1,float16,fp8,0,3.952586809794108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,4,128,1,float16,fp8,0,3.8843199412027993
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,4,128,1,float16,float16,0,4.9423573811848955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,8,128,1,float16,float16,0,4.61411730448405
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,1,128,1,float16,float16,0,2.5391359329223633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,40,128,1,float16,float16,0,2.061194737752279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,40,128,1,float16,fp8,0,2.2618719736735025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,1,128,1,float16,fp8,0,2.021018664042155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,2,128,1,float16,float16,0,2.0688907305399575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,4,128,1,float16,fp8,0,2.6018932660420737
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,8,128,1,float16,float16,0,2.054485321044922
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,2,128,1,float16,fp8,0,2.3863840103149414
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,8,128,1,float16,fp8,0,2.0130772590637207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,40,4,128,1,float16,float16,0,2.498143990834554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,40,128,1,float16,float16,0,1.08024001121521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,1,128,1,float16,float16,0,1.2864533265431721
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,1,128,1,float16,fp8,0,1.0611146291097004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,2,128,1,float16,float16,0,1.1414133707682292
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,2,128,1,float16,fp8,0,1.077082633972168
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,4,128,1,float16,float16,0,1.2917226950327556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,8,128,1,float16,float16,0,1.0962933699289958
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,8,128,1,float16,fp8,0,1.0592959721883137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,40,8,128,1,float16,fp8,0,3.966848055521647
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,1,128,1,float16,fp8,0,5.441461563110352
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,40,128,1,float16,fp8,0,0.9980853398640951
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,1,128,1,float16,float16,0,7.658517201741536
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,2,128,1,float16,fp8,0,5.5524851481119795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,4,128,1,float16,float16,0,5.605637232462565
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,2,128,1,float16,float16,0,7.407072067260742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,4,128,1,float16,fp8,0,5.600079854329427
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,40,4,128,1,float16,fp8,0,1.0619786580403645
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,1,128,1,float16,float16,0,2.845088005065918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,1,128,1,float16,fp8,0,3.220698674519857
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,40,128,1,float16,fp8,0,2.8188959757486978
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,2,128,1,float16,float16,0,3.40559450785319
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,8,128,1,float16,float16,0,6.239621480305989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,2,128,1,float16,fp8,0,2.814885457356771
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,40,128,1,float16,float16,0,3.705024083455404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,4,128,1,float16,float16,0,2.846714655558268
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,4,128,1,float16,fp8,0,3.0157388051350913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,8,128,1,float16,float16,0,3.017792065938314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,40,8,128,1,float16,fp8,0,2.7993812561035156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,40,128,1,float16,fp8,0,1.3674346605936687
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,40,128,1,float16,float16,0,1.5079360008239746
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,1,128,1,float16,float16,0,1.7559946378072102
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,1,128,1,float16,fp8,0,1.4852746327718098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,2,128,1,float16,fp8,0,1.5205119450887044
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,2,128,1,float16,float16,0,1.8014133771260579
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,4,128,1,float16,fp8,0,1.4782506624857585
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,8,128,1,float16,float16,0,1.4661280314127605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,4,128,1,float16,float16,0,1.7872053782145183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,40,128,1,float16,float16,0,0.7829919656117758
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,1,128,1,float16,float16,0,0.8277440071105957
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,1,128,1,float16,fp8,0,0.7665812969207764
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,2,128,1,float16,float16,0,0.9264907042185465
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,40,8,128,1,float16,fp8,0,5.193626721700032
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,4,128,1,float16,fp8,0,0.765397310256958
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,2,128,1,float16,fp8,0,0.7660426298777262
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,8,128,1,float16,fp8,0,0.7651360034942627
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,8,128,1,float16,float16,0,0.788149356842041
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,40,8,128,1,float16,fp8,0,1.4483839670817058
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,40,128,1,float16,fp8,0,0.8088586330413818
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,40,4,128,1,float16,float16,0,0.859498659769694
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,1,128,1,float16,fp8,0,7.259679794311523
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,2,128,1,float16,float16,0,7.312496185302734
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,2,128,1,float16,fp8,0,6.798933029174805
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,4,128,1,float16,float16,0,7.3176320393880205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,1,128,1,float16,float16,0,10.197898864746094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,1,128,1,float16,fp8,0,3.430405298868815
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,4,128,1,float16,fp8,0,7.271546681722005
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,1,128,1,float16,float16,0,4.733248074849446
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,40,128,1,float16,fp8,0,3.439455986022949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,40,128,1,float16,float16,0,3.7141974767049155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,2,128,1,float16,float16,0,4.800378799438477
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,8,128,1,float16,fp8,0,6.795834859212239
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,2,128,1,float16,fp8,0,3.4307521184285483
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,40,8,128,1,float16,float16,0,10.246986389160156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,4,128,1,float16,float16,0,3.69702943166097
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,4,128,1,float16,fp8,0,4.012399991353353
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,8,128,1,float16,fp8,0,3.668095906575521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,40,8,128,1,float16,float16,0,4.774250666300456
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,1,128,1,float16,float16,0,2.009221394856771
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,40,128,1,float16,fp8,0,1.7513866424560547
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,40,128,1,float16,float16,0,2.21670929590861
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,1,128,1,float16,fp8,0,2.3282292683919272
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,2,128,1,float16,fp8,0,1.9243946075439453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,4,128,1,float16,float16,0,2.012506643931071
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,1,128,1,float16,float16,0,0.9866666793823242
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,4,128,1,float16,fp8,0,2.1382452646891275
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,8,128,1,float16,float16,0,1.8834506670633953
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,8,128,1,float16,fp8,0,1.8697706858317058
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,1,128,1,float16,fp8,0,0.9901706377665201
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,2,128,1,float16,float16,0,0.9830719629923502
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,2,128,1,float16,fp8,0,1.037717342376709
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,4,128,1,float16,float16,0,1.1523466904958088
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,4,128,1,float16,fp8,0,0.9056746959686279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,8,128,1,float16,float16,0,1.0818026860555012
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,8,128,1,float16,fp8,0,0.9073546727498373
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,1,128,1,float16,float16,0,0.5630720059076945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,1,128,1,float16,fp8,0,0.5361013412475586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,2,128,1,float16,float16,0,0.5254186789194742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,40,128,1,float16,float16,0,0.5360480149586996
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,40,128,1,float16,fp8,0,0.48629868030548096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,2,128,1,float16,fp8,0,0.4842453400293986
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,4,128,1,float16,float16,0,0.6203680038452148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,4,128,1,float16,fp8,0,0.48573867479960126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,8,128,1,float16,fp8,0,0.48556800683339435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,40,2,128,1,float16,float16,0,2.287183920542399
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,40,128,1,float16,float16,0,1.0024853547414143
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,1,128,1,float16,fp8,0,4.019162813822429
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,2,128,1,float16,float16,0,4.302624066670735
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,1,128,1,float16,float16,0,5.5787309010823565
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,4,128,1,float16,float16,0,4.306415875752767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,40,8,128,1,float16,float16,0,0.6232586701711019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,4,128,1,float16,fp8,0,4.287610689798991
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,8,128,1,float16,float16,0,4.729498545328776
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,40,128,1,float16,float16,0,2.363818645477295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,8,128,1,float16,fp8,0,4.012853304545085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,1,128,1,float16,float16,0,2.6492907206217446
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,1,128,1,float16,fp8,0,2.168933391571045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,2,128,1,float16,float16,0,2.6711947123209634
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,4,128,1,float16,float16,0,2.180895964304606
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,2,128,1,float16,fp8,0,2.188762664794922
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,4,128,1,float16,fp8,0,2.3278932571411133
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,8,128,1,float16,float16,0,2.181392033894857
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,40,128,1,float16,float16,0,1.1265546480814617
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,8,128,1,float16,fp8,0,2.6208747227986655
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,40,128,1,float16,fp8,0,1.0443466504414876
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,1,128,1,float16,float16,0,1.2616853713989258
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,1,128,1,float16,fp8,0,1.1131733258565266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,40,40,128,1,float16,fp8,0,2.0380853017171225
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,2,128,1,float16,fp8,0,1.1792960166931152
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,4,128,1,float16,fp8,0,1.0560213724772136
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,4,128,1,float16,float16,0,1.3161280155181885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,8,128,1,float16,float16,0,1.1191786924997966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,8,128,1,float16,fp8,0,1.0408960183461506
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,1,128,1,float16,float16,0,0.675551970799764
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,1,128,1,float16,fp8,0,0.5873119831085205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,40,128,1,float16,fp8,0,0.5486933390299479
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,2,128,1,float16,float16,0,0.6304213205973307
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,40,2,128,1,float16,fp8,0,4.223109245300293
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,2,128,1,float16,fp8,0,0.583903988202413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,4,128,1,float16,fp8,0,0.547216018040975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,4,128,1,float16,float16,0,0.6275093158086141
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,8,128,1,float16,float16,0,0.587658683458964
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,8,128,1,float16,fp8,0,0.5846613248189291
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,1,128,1,float16,float16,0,0.35178665320078534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,1,128,1,float16,fp8,0,0.3200533390045166
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,40,128,1,float16,float16,0,0.3269973397254944
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,40,128,1,float16,fp8,0,0.30060267448425293
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,2,128,1,float16,fp8,0,0.3199626604715983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,40,2,128,1,float16,float16,0,1.3515893618265789
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,4,128,1,float16,float16,0,0.35228800773620605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,4,128,1,float16,fp8,0,0.3198666572570801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,8,128,1,float16,float16,0,0.3232693274815877
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,40,40,128,1,float16,fp8,0,0.9244213104248047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,8,128,1,float16,fp8,0,0.3219093283017476
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,40,40,128,1,float16,float16,0,0.6007253328959147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,1,128,1,float16,fp8,0,3.8589919408162436
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,1,128,1,float16,float16,0,4.117242813110352
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,2,128,1,float16,fp8,0,4.120496114095052
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,2,128,1,float16,float16,0,4.778186798095703
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,4,128,1,float16,float16,0,4.1229705810546875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,40,2,128,1,float16,float16,0,0.3595893383026123
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,4,128,1,float16,fp8,0,4.11628786722819
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,8,128,1,float16,float16,0,4.214271863301595
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,1,128,1,float16,float16,0,2.5684266090393066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,1,128,1,float16,fp8,0,2.081066608428955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,2,128,1,float16,float16,0,2.3740320205688477
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,40,128,1,float16,fp8,0,1.9661653836568196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,40,128,1,float16,float16,0,2.108954588572184
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,40,8,128,1,float16,fp8,0,3.8778772354125977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,4,128,1,float16,float16,0,2.515600045522054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,4,128,1,float16,fp8,0,2.418432076772054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,1,128,1,float16,float16,0,1.0911306540171306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,8,128,1,float16,fp8,0,2.029557387034098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,8,128,1,float16,float16,0,2.582592010498047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,40,128,1,float16,fp8,0,0.9985067049662272
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,1,128,1,float16,fp8,0,1.0585386753082275
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,2,128,1,float16,float16,0,1.1645546754201253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,2,128,1,float16,fp8,0,1.0209279855092366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,4,128,1,float16,float16,0,1.0839040279388428
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,4,128,1,float16,fp8,0,1.246128002802531
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,8,128,1,float16,float16,0,1.0626293023427327
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,8,128,1,float16,fp8,0,1.2838986714680989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,1,128,1,float16,fp8,0,0.5975679953893026
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,40,2,128,1,float16,fp8,0,1.94812806447347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,40,128,1,float16,float16,0,0.5602453152338663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,40,128,1,float16,fp8,0,0.5183413426081339
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,2,128,1,float16,float16,0,0.588101347287496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,4,128,1,float16,float16,0,0.6406613190968832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,4,128,1,float16,fp8,0,0.5505599975585938
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,8,128,1,float16,fp8,0,0.5506986776987711
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,1,128,1,float16,float16,0,0.32206400235493976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,40,128,1,float16,float16,0,0.31514666477839154
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,1,128,1,float16,fp8,0,0.2977173328399658
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,2,128,1,float16,fp8,0,0.29713066418965656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,40,40,128,1,float16,float16,0,1.074170668919881
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,4,128,1,float16,fp8,0,0.2980533242225647
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,8,128,1,float16,float16,0,0.30745067199071247
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,8,128,1,float16,fp8,0,0.29704533020655316
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,1,128,1,float16,float16,0,0.6390346686045328
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,1,128,1,float16,float16,0,0.19147199392318726
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,40,128,1,float16,float16,0,0.19112000862757364
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,1,128,1,float16,fp8,0,0.16960533459981283
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,2,128,1,float16,fp8,0,0.6031573216120402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,2,128,1,float16,fp8,0,0.1702666680018107
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,2,128,1,float16,float16,0,0.19353600343068442
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,4,128,1,float16,float16,0,0.18397865692774454
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,4,128,1,float16,fp8,0,0.17081065972646078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,8,128,1,float16,fp8,0,0.1562986671924591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,8,128,1,float16,float16,0,0.17098132769266763
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,40,128,1,float16,fp8,0,0.2813813289006551
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,4,128,1,float16,float16,0,0.2983466585477193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,1,128,1,float16,float16,0,2.493354638417562
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,1,128,1,float16,fp8,0,2.467456022898356
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,2,128,1,float16,float16,0,2.9240907033284507
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,2,128,1,float16,fp8,0,2.3508639335632324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,4,128,1,float16,float16,0,2.4996213912963867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,40,40,128,1,float16,fp8,0,0.15979199608167013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,4,128,1,float16,fp8,0,2.498117287953695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,8,128,1,float16,float16,0,2.6534506479899087
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,40,8,128,1,float16,fp8,0,2.718245188395182
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,40,128,1,float16,float16,0,1.2871840000152588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,1,128,1,float16,float16,0,1.4965920448303223
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,40,128,1,float16,fp8,0,1.1997013092041016
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,1,128,1,float16,fp8,0,1.2641279697418213
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,2,128,1,float16,float16,0,1.5015840530395508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,2,128,1,float16,fp8,0,1.320207993189494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,4,128,1,float16,fp8,0,1.2644906838734944
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,4,128,1,float16,float16,0,1.5220905939737956
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,8,128,1,float16,float16,0,1.2675519784291585
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,1,128,1,float16,float16,0,0.6884586811065674
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,40,8,128,1,float16,fp8,0,1.1903786659240723
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,1,128,1,float16,fp8,0,0.647765318552653
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,40,128,1,float16,float16,0,0.7360533078511556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,2,128,1,float16,float16,0,0.7539680004119873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,2,128,1,float16,fp8,0,0.6106400092442831
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,4,128,1,float16,float16,0,0.6497386693954468
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,4,128,1,float16,fp8,0,0.6109386682510376
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,8,128,1,float16,float16,0,0.7124160130818685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,8,128,1,float16,fp8,0,0.6487679878870646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,40,8,128,1,float16,float16,0,0.5930933157602946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,1,128,1,float16,float16,0,0.38978131612141925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,1,128,1,float16,fp8,0,0.3424373467763265
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,40,128,1,float16,fp8,0,0.32472532987594604
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,2,128,1,float16,fp8,0,0.34414398670196533
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,40,2,128,1,float16,float16,0,0.31910934050877887
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,4,128,1,float16,fp8,0,0.34275734424591064
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,8,128,1,float16,float16,0,0.3838719924290975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,8,128,1,float16,fp8,0,0.34113601843516034
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,1,128,1,float16,float16,0,0.20521599054336548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,1,128,1,float16,fp8,0,0.17468800147374472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,40,128,1,float16,float16,0,0.20752533276875815
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,2,128,1,float16,float16,0,0.2039146622021993
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,40,40,128,1,float16,fp8,0,0.6146186590194702
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,2,128,1,float16,fp8,0,0.1884373426437378
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,4,128,1,float16,float16,0,0.2035199999809265
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,4,128,1,float16,fp8,0,0.18916799624760947
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,8,128,1,float16,fp8,0,0.18810133139292398
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,8,128,1,float16,float16,0,0.19010667006174722
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,1,128,1,float16,float16,0,0.12225066622098286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,1,128,1,float16,fp8,0,0.11095999677975972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,40,128,1,float16,float16,0,0.3505386511484782
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,2,128,1,float16,float16,0,0.12197333574295044
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,2,128,1,float16,float16,0,0.3887999852498372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,2,128,1,float16,fp8,0,0.10281067093213399
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,4,128,1,float16,fp8,0,0.11173333724339803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,4,128,1,float16,float16,0,0.11157332857449849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,8,128,1,float16,float16,0,0.12229866782824199
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,8,128,1,float16,fp8,0,0.10345066587130229
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,40,40,128,1,float16,fp8,0,0.18982932964960733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,1,128,1,float16,float16,0,2.9076159795125327
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,2,128,1,float16,float16,0,2.6170667012532554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,2,128,1,float16,fp8,0,2.388144016265869
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,4,128,1,float16,float16,0,2.6077173550923667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,40,128,1,float16,fp8,0,0.10339200496673584
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,40,4,128,1,float16,float16,0,0.3644853432973226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,4,128,1,float16,fp8,0,2.587765375773112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,8,128,1,float16,float16,0,2.5327626864115396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,8,128,1,float16,fp8,0,2.3901440302530923
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,40,128,1,float16,float16,0,1.433008035024007
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,1,128,1,float16,float16,0,1.3999892870585124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,40,128,1,float16,fp8,0,1.2176799774169922
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,1,128,1,float16,fp8,0,1.2784960269927979
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,40,1,128,1,float16,fp8,0,2.385258674621582
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,8,128,1,float16,float16,0,1.2737706502278645
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,4,128,1,float16,float16,0,1.4785173734029133
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,4,128,1,float16,fp8,0,1.2175412972768147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,8,128,1,float16,fp8,0,1.272063970565796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,2,128,1,float16,float16,0,1.2691573301951091
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,40,2,128,1,float16,fp8,0,1.2047573725382488
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,1,128,1,float16,fp8,0,0.6501226822535197
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,40,40,128,1,float16,float16,0,0.12150933345158894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,40,128,1,float16,float16,0,0.6632266839345297
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,2,128,1,float16,float16,0,0.7424853642781576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,40,128,1,float16,fp8,0,0.6209226846694946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,2,128,1,float16,fp8,0,0.6482933362325033
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,4,128,1,float16,float16,0,0.6849599679311117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,4,128,1,float16,fp8,0,0.6502399841944376
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,8,128,1,float16,float16,0,0.6497546831766764
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,1,128,1,float16,float16,0,0.3373013337453206
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,8,128,1,float16,fp8,0,0.6146453221638998
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,2,128,1,float16,float16,0,0.3381173213322957
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,2,128,1,float16,fp8,0,0.34225066502888996
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,4,128,1,float16,fp8,0,0.3195146719614665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,4,128,1,float16,float16,0,0.34377598762512207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,8,128,1,float16,float16,0,0.34055999914805096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,40,128,1,float16,float16,0,0.34515734513600665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,8,128,1,float16,fp8,0,0.3376479943593343
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,1,128,1,float16,fp8,0,0.1844159960746765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,2,128,1,float16,float16,0,0.19669866561889648
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,40,128,1,float16,float16,0,0.2032159964243571
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,2,128,1,float16,fp8,0,0.18197333812713623
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,40,128,1,float16,fp8,0,0.1736746629079183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,4,128,1,float16,float16,0,0.1973173419634501
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,4,128,1,float16,fp8,0,0.1831573247909546
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,8,128,1,float16,fp8,0,0.17267733812332153
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,8,128,1,float16,float16,0,0.18437333901723227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,40,1,128,1,float16,float16,0,0.744213342666626
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,1,128,1,float16,float16,0,0.10687466462453206
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,40,128,1,float16,float16,0,0.11545600493748982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,40,128,1,float16,fp8,0,0.10487467050552368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,2,128,1,float16,fp8,0,0.10283733407656352
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,4,128,1,float16,fp8,0,0.10345600048700969
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,8,128,1,float16,float16,0,0.10526399811108907
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,8,128,1,float16,fp8,0,0.10217066605885823
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,1,128,1,float16,fp8,0,0.3379253149032593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,1,128,1,float16,float16,0,0.06405866642793019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,40,40,128,1,float16,fp8,0,0.3226133386294047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,1,128,1,float16,fp8,0,0.05987200140953064
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,40,128,1,float16,fp8,0,0.06297599772612254
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,40,128,1,float16,float16,0,0.0681279997030894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,2,128,1,float16,fp8,0,0.06007466713587443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,2,128,1,float16,float16,0,0.06467733283837636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,40,1,128,1,float16,float16,0,0.2004693349202474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,4,128,1,float16,fp8,0,0.06467733283837636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,8,128,1,float16,fp8,0,0.059152002135912575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,8,128,1,float16,float16,0,0.06426666676998138
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,1,128,1,float16,float16,0,1.6613814036051433
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,1,128,1,float16,fp8,0,0.10474666953086853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,1,128,1,float16,fp8,0,1.5901333491007488
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,2,128,1,float16,float16,0,1.5901974042256672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,4,128,1,float16,float16,0,0.1109920044740041
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,2,128,1,float16,fp8,0,1.590010643005371
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,4,128,1,float16,float16,0,1.5926987330118816
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,4,128,1,float16,fp8,0,1.5935680071512859
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,8,128,1,float16,float16,0,1.5964746475219727
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,40,8,128,1,float16,fp8,0,1.5560107231140137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,1,128,1,float16,float16,0,0.8825493653615316
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,40,128,1,float16,fp8,0,0.7784159978230795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,1,128,1,float16,fp8,0,0.8066186904907227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,2,128,1,float16,float16,0,0.8951306343078613
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,40,128,1,float16,float16,0,0.8247520128885905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,2,128,1,float16,fp8,0,0.8071786562601725
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,4,128,1,float16,float16,0,0.8066453138987223
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,4,128,1,float16,fp8,0,0.807258685429891
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,40,2,128,1,float16,float16,0,0.1151146690050761
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,8,128,1,float16,float16,0,0.8096746603647867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,1,128,1,float16,float16,0,0.41363199551900226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,40,8,128,1,float16,fp8,0,0.8060586452484131
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,1,128,1,float16,fp8,0,0.4153493245442708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,40,128,1,float16,float16,0,0.4264693260192871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,40,128,1,float16,fp8,0,0.3989280064900716
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,2,128,1,float16,fp8,0,0.4158613284428914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,4,128,1,float16,float16,0,0.4147253433863322
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,4,128,1,float16,fp8,0,0.4153759876887004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,8,128,1,float16,float16,0,0.41675734519958496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,8,128,1,float16,fp8,0,0.41547731558481854
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,1,128,1,float16,fp8,0,0.21850667397181192
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,1,128,1,float16,float16,0,0.23144533236821493
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,2,128,1,float16,float16,0,0.23767467339833578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,40,4,128,1,float16,float16,0,0.06825600067774455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,2,128,1,float16,fp8,0,0.21876800060272217
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,4,128,1,float16,float16,0,0.21969600518544516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,8,128,1,float16,fp8,0,0.21876267592112222
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,40,128,1,float16,float16,0,0.23386132717132568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,4,128,1,float16,fp8,0,0.21852266788482666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,40,128,1,float16,fp8,0,0.2108959952990214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,40,8,128,1,float16,float16,0,0.2217866579691569
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,1,128,1,float16,float16,0,0.13150933384895325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,1,128,1,float16,fp8,0,0.12203733126322429
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,40,128,1,float16,fp8,0,0.11708266536394756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,2,128,1,float16,float16,0,0.1290666659673055
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,2,128,1,float16,fp8,0,0.12326400478680928
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,4,128,1,float16,fp8,0,0.1145919958750407
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,8,128,1,float16,float16,0,0.12186132868131001
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,1,128,1,float16,float16,0,0.07557866473992665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,1,128,1,float16,fp8,0,0.07120533287525177
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,2,128,1,float16,float16,0,0.07520000139872234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,40,2,128,1,float16,float16,0,0.4587999979654948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,2,128,1,float16,fp8,0,0.07114666700363159
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,40,128,1,float16,float16,0,0.07438399891058604
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,4,128,1,float16,float16,0,0.07100800176461537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,4,128,1,float16,fp8,0,0.07183466851711273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,8,128,1,float16,float16,0,0.07105599840482076
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,8,128,1,float16,fp8,0,0.06629333396752675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,1,128,1,float16,float16,0,0.050698667764663696
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,40,128,1,float16,fp8,0,0.04785599807898203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,2,128,1,float16,float16,0,0.05044800043106079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,40,128,1,float16,float16,0,0.04783466458320618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,2,128,1,float16,fp8,0,0.04690133531888326
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,4,128,1,float16,float16,0,0.04993066688378652
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,4,128,1,float16,fp8,0,0.043765331308046974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,8,128,1,float16,float16,0,0.05171733101209005
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,8,128,1,float16,fp8,0,0.045925334095954895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,4,128,1,float16,float16,0,0.13009066383043924
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,8,128,1,float16,fp8,0,0.12020267049471538
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,1,128,1,float16,float16,0,1.7074294090270996
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,1,128,1,float16,fp8,0,1.7142240206400554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,2,128,1,float16,float16,0,1.80187193552653
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,40,40,128,1,float16,fp8,0,0.06857599814732869
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,2,128,1,float16,fp8,0,1.7163039843241374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,40,1,128,1,float16,fp8,0,0.043749332427978516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,4,128,1,float16,float16,0,1.7685759862263997
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,4,128,1,float16,fp8,0,1.649397373199463
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,8,128,1,float16,float16,0,1.7180426915486653
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,40,128,1,float16,float16,0,0.8842453161875407
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,1,128,1,float16,float16,0,0.8629279931386312
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,40,8,128,1,float16,fp8,0,1.7146186828613281
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,40,40,128,1,float16,float16,0,0.12401066223780315
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,2,128,1,float16,fp8,0,0.8336586952209473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,40,128,1,float16,fp8,0,0.8420373598734537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,8,128,1,float16,float16,0,0.8675093650817871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,4,128,1,float16,fp8,0,0.8330453236897787
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,1,128,1,float16,fp8,0,0.8656799793243408
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,2,128,1,float16,float16,0,0.8636533419291178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,4,128,1,float16,float16,0,0.931984027226766
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,40,8,128,1,float16,fp8,0,0.8352853457132975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,1,128,1,float16,float16,0,0.4685973326365153
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,2,128,1,float16,float16,0,0.46648534138997394
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,40,128,1,float16,fp8,0,0.4299573500951131
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,1,128,1,float16,fp8,0,0.4254026810328166
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,2,128,1,float16,fp8,0,0.44411734739939374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,40,128,1,float16,float16,0,0.4512853225072225
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,8,128,1,float16,float16,0,0.44306135177612305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,4,128,1,float16,fp8,0,0.44234132766723633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,1,128,1,float16,float16,0,0.23011734088261923
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,2,128,1,float16,float16,0,0.251306672890981
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,1,128,1,float16,fp8,0,0.23109867175420126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,40,128,1,float16,fp8,0,0.2237280011177063
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,2,128,1,float16,fp8,0,0.22125333547592163
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,40,128,1,float16,float16,0,0.2587786714235942
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,4,128,1,float16,float16,0,0.24871466557184854
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,4,128,1,float16,fp8,0,0.23255467414855957
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,8,128,1,float16,float16,0,0.23220799366633096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,1,128,1,float16,float16,0,0.12702932953834534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,1,128,1,float16,fp8,0,0.12418666481971741
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,40,128,1,float16,float16,0,0.14100799957911173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,2,128,1,float16,float16,0,0.12594667077064514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,40,128,1,float16,fp8,0,0.12136000394821167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,4,128,1,float16,float16,0,0.13521599769592285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,8,128,1,float16,float16,0,0.1257866621017456
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,8,128,1,float16,fp8,0,0.12599999705950418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,4,128,1,float16,float16,0,0.47514665126800537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,40,128,1,float16,fp8,0,0.07214933137098949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,40,128,1,float16,float16,0,0.08066666622956593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,1,128,1,float16,fp8,0,0.07086933155854543
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,1,128,1,float16,float16,0,0.07789866626262665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,2,128,1,float16,float16,0,0.07723199824492137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,2,128,1,float16,fp8,0,0.07148266832033794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,4,128,1,float16,float16,0,0.07183466851711273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,4,128,1,float16,fp8,0,0.07047999898592631
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,40,8,128,1,float16,fp8,0,0.2230506738026937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,8,128,1,float16,float16,0,0.07251200079917908
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,40,8,128,1,float16,fp8,0,0.06775466601053874
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,1,128,1,float16,float16,0,0.04622933268547058
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,2,128,1,float16,float16,0,0.04351999859015147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,2,128,1,float16,fp8,0,0.043354665239652
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,40,128,1,float16,fp8,0,0.040549332896868386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,2,128,1,float16,fp8,0,0.12469866871833801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,4,128,1,float16,float16,0,0.04338666796684265
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,4,128,1,float16,fp8,0,0.04213866591453552
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,8,128,1,float16,float16,0,0.043935999274253845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,40,4,128,1,float16,fp8,0,0.12061333656311035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,1,128,1,float16,fp8,0,0.02957333376010259
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,2,128,1,float16,float16,0,0.03164800008138021
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,40,128,1,float16,float16,0,0.031317333380381264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,40,128,1,float16,fp8,0,0.02977599948644638
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,40,8,128,1,float16,fp8,0,0.4421120087305705
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,2,128,1,float16,fp8,0,0.029258665939172108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,4,128,1,float16,float16,0,0.031231999397277832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,4,128,1,float16,fp8,0,0.03139200061559677
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,8,128,1,float16,fp8,0,0.02922133356332779
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,8,128,1,float16,float16,0,0.03162666658560435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,1,128,1,float16,float16,0,1.3028480211893718
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,1,128,1,float16,fp8,0,1.2787946859995525
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,40,128,1,float16,float16,0,0.04353066782156626
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,2,128,1,float16,float16,0,1.3308746814727783
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,8,128,1,float16,fp8,0,0.04162133236726125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,2,128,1,float16,fp8,0,1.3075146675109863
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,40,1,128,1,float16,float16,0,0.03156800071398417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,4,128,1,float16,float16,0,1.3058453400929768
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,1,128,1,float16,float16,0,0.657807985941569
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,40,128,1,float16,float16,0,0.6715413729349772
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,4,128,1,float16,fp8,0,1.2772213617960613
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,2,128,1,float16,float16,0,0.6769599914550781
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,1,128,1,float16,fp8,0,0.6613973379135132
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,40,1,128,1,float16,fp8,0,0.04120533416668574
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,2,128,1,float16,fp8,0,0.6598879893620809
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,8,128,1,float16,fp8,0,1.3056533336639404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,40,8,128,1,float16,float16,0,1.3065866629282634
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,4,128,1,float16,float16,0,0.6764799753824869
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,4,128,1,float16,fp8,0,0.6593653361002604
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,8,128,1,float16,float16,0,0.6784586906433105
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,40,128,1,float16,float16,0,0.34464001655578613
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,1,128,1,float16,fp8,0,0.338373343149821
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,2,128,1,float16,float16,0,0.3556906779607137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,40,128,1,float16,fp8,0,0.6484053134918213
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,4,128,1,float16,fp8,0,0.3382720152537028
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,4,128,1,float16,float16,0,0.3513760169347127
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,8,128,1,float16,float16,0,0.33803733189900714
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,2,128,1,float16,fp8,0,0.3385386864344279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,1,128,1,float16,float16,0,0.35573331514994305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,8,128,1,float16,fp8,0,0.33834131558736164
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,2,128,1,float16,float16,0,0.18662933508555093
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,1,128,1,float16,fp8,0,0.17342400550842285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,40,40,128,1,float16,fp8,0,0.33074132601420086
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,40,128,1,float16,float16,0,0.18089600404103598
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,40,128,1,float16,fp8,0,0.17285333077112833
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,1,128,1,float16,float16,0,0.18901334206263223
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,4,128,1,float16,float16,0,0.18609599272410074
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,4,128,1,float16,fp8,0,0.1765120029449463
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,2,128,1,float16,fp8,0,0.17223999897638956
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,8,128,1,float16,float16,0,0.17770665884017944
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,1,128,1,float16,float16,0,0.10287466645240784
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,1,128,1,float16,fp8,0,0.09290666381518047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,40,8,128,1,float16,fp8,0,0.6457653443018595
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,4,128,1,float16,fp8,0,0.09285333752632141
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,4,128,1,float16,float16,0,0.1030506690343221
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,2,128,1,float16,fp8,0,0.09681600332260132
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,8,128,1,float16,fp8,0,0.09283199906349182
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,8,128,1,float16,float16,0,0.09729599952697754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,1,128,1,float16,float16,0,0.059152002135912575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,40,128,1,float16,fp8,0,0.05598400036493937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,1,128,1,float16,fp8,0,0.05345066885153452
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,40,128,1,float16,float16,0,0.0581279993057251
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,4,128,1,float16,float16,0,0.05505066613356272
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,2,128,1,float16,fp8,0,0.05539200206597646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,4,128,1,float16,fp8,0,0.05251200000445048
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,8,128,1,float16,float16,0,0.055776000022888184
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,8,128,1,float16,fp8,0,0.051669334371884666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,40,8,128,1,float16,fp8,0,0.1766293247540792
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,40,128,1,float16,float16,0,0.035205334424972534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,2,128,1,float16,float16,0,0.09619733691215515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,40,128,1,float16,float16,0,0.10762666662534077
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,1,128,1,float16,float16,0,0.0347626656293869
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,4,128,1,float16,float16,0,0.03591466695070267
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,2,128,1,float16,fp8,0,0.0342399999499321
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,4,128,1,float16,fp8,0,0.03313066562016805
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,8,128,1,float16,float16,0,0.03549333413441976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,40,2,128,1,float16,float16,0,0.058373332023620605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,8,128,1,float16,fp8,0,0.033200000723203026
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,1,128,1,float16,float16,0,0.02714666724205017
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,1,128,1,float16,fp8,0,0.025386666258176167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,40,128,1,float16,fp8,0,0.025349333882331848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,2,128,1,float16,float16,0,0.02699200063943863
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,2,128,1,float16,fp8,0,0.026565333207448322
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,40,128,1,float16,float16,0,0.02521066615978877
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,4,128,1,float16,fp8,0,0.025029333929220837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,8,128,1,float16,float16,0,0.025349333882331848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,8,128,1,float16,fp8,0,0.024911999702453613
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,1,128,1,float16,float16,0,0.01887999971707662
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,40,128,1,float16,fp8,0,0.033173332611719765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,1,128,1,float16,fp8,0,0.016976000120242436
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,40,128,1,float16,fp8,0,0.017103999853134155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,40,128,1,float16,float16,0,0.018805333723624546
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,2,128,1,float16,float16,0,0.017887999614079792
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,2,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,2,128,1,float16,float16,0,0.03555733213822047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,4,128,1,float16,float16,0,0.017504000415404636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,4,128,1,float16,fp8,0,0.017162666966517765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,8,128,1,float16,float16,0,0.01709866647919019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,40,8,128,1,float16,fp8,0,0.01714133347074191
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,1,128,1,float16,float16,0,0.5554666519165039
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,1,128,1,float16,fp8,0,0.5448266665140787
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,2,128,1,float16,float16,0,0.5454506476720175
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,2,128,1,float16,fp8,0,0.5437973340352377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,4,128,1,float16,float16,0,0.5532480080922445
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,40,4,128,1,float16,float16,0,0.02720533311367035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,4,128,1,float16,fp8,0,0.549397349357605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,1,128,1,float16,float16,0,0.2846133311589559
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,8,128,1,float16,fp8,0,0.5468426545461019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,40,8,128,1,float16,float16,0,0.5455466508865356
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,2,128,1,float16,float16,0,0.2839626669883728
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,1,128,1,float16,fp8,0,0.2809973359107971
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,4,128,1,float16,fp8,0,0.2802186608314514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,2,128,1,float16,fp8,0,0.2767946720123291
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,4,128,1,float16,float16,0,0.28674666086832684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,8,128,1,float16,float16,0,0.2792373299598694
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,8,128,1,float16,fp8,0,0.27965333064397174
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,1,128,1,float16,float16,0,0.15445866187413534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,40,128,1,float16,fp8,0,0.1483680009841919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,1,128,1,float16,fp8,0,0.1490079959233602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,2,128,1,float16,float16,0,0.15357866883277893
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,40,128,1,float16,float16,0,0.2823413411776225
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,40,40,128,1,float16,fp8,0,0.27722134192784625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,2,128,1,float16,fp8,0,0.14686933159828186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,4,128,1,float16,float16,0,0.15120533108711243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,4,128,1,float16,fp8,0,0.14813333749771118
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,8,128,1,float16,float16,0,0.15568533539772034
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,8,128,1,float16,fp8,0,0.1469119985898336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,1,128,1,float16,float16,0,0.08264000217119853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,2,128,1,float16,fp8,0,0.07843199868996938
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,40,128,1,float16,float16,0,0.08090133468310039
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,40,128,1,float16,fp8,0,0.07832533121109009
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,2,128,1,float16,float16,0,0.08059733112653096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,4,128,1,float16,float16,0,0.08064533273379008
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,8,128,1,float16,float16,0,0.07886933286984761
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,8,128,1,float16,fp8,0,0.07843733330567677
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,1,128,1,float16,float16,0,0.046298667788505554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,1,128,1,float16,fp8,0,0.04586133360862732
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,40,128,1,float16,float16,0,0.0495413343111674
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,40,128,1,float16,fp8,0,0.045834665497144066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,2,128,1,float16,float16,0,0.04614399870236715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,40,40,128,1,float16,fp8,0,0.09372267127037048
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,4,128,1,float16,float16,0,0.047541335225105286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,2,128,1,float16,fp8,0,0.04571199913819631
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,8,128,1,float16,float16,0,0.04769066472848257
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,8,128,1,float16,fp8,0,0.04554666578769684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,1,128,1,float16,float16,0,0.03129599988460541
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,40,128,1,float16,float16,0,0.030837332208951313
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,40,128,1,float16,fp8,0,0.02956799914439519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,1,128,1,float16,fp8,0,0.07887466748555501
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,2,128,1,float16,fp8,0,0.030618667602539062
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,4,128,1,float16,float16,0,0.03146133323510488
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,4,128,1,float16,fp8,0,0.030234667162100475
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,40,4,128,1,float16,fp8,0,0.07930133243401845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,8,128,1,float16,float16,0,0.031178665657838184
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,8,128,1,float16,fp8,0,0.029680001238981884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,1,128,1,float16,float16,0,0.023141334454218548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,40,40,128,1,float16,float16,0,0.15038933356602988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,1,128,1,float16,fp8,0,0.02170666555563609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,40,1,128,1,float16,fp8,0,0.03350399931271871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,2,128,1,float16,float16,0,0.02236266682545344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,40,128,1,float16,float16,0,0.022005334496498108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,40,4,128,1,float16,fp8,0,0.045642669002215065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,4,128,1,float16,fp8,0,0.02111999938885371
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,8,128,1,float16,float16,0,0.021359999974568684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,8,128,1,float16,fp8,0,0.021269333859284718
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,1,128,1,float16,fp8,0,0.02978666623433431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,40,128,1,float16,fp8,0,0.01482133318980535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,1,128,1,float16,float16,0,0.015157333264748255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,1,128,1,float16,fp8,0,0.014943999548753103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,40,2,128,1,float16,float16,0,0.031194667021433514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,2,128,1,float16,float16,0,0.015066667149464289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,2,128,1,float16,fp8,0,0.01471466695268949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,4,128,1,float16,float16,0,0.015130666395028433
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,8,128,1,float16,float16,0,0.014959999670584997
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,8,128,1,float16,fp8,0,0.014869333555301031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,40,128,1,float16,float16,0,0.014837333311637243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,40,128,1,float16,fp8,0,0.014730667074521383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,1,128,1,float16,float16,0,0.014778666198253632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,1,128,1,float16,fp8,0,0.014720000326633453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,2,128,1,float16,fp8,0,0.0220320001244545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,2,128,1,float16,fp8,0,0.014901333798964819
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,4,128,1,float16,float16,0,0.014778666198253632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,4,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,8,128,1,float16,float16,0,0.015066667149464289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,4,128,1,float16,float16,0,0.02242133269707362
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,8,128,1,float16,fp8,0,0.014783999572197596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,40,128,1,float16,float16,0,0.015125333021084467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,1,128,1,float16,float16,0,0.3417760133743286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,1,128,1,float16,fp8,0,0.3362133502960205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,2,128,1,float16,float16,0,0.338645339012146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,2,128,1,float16,fp8,0,0.336298664410909
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,40,4,128,1,float16,fp8,0,0.014815999815861383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,4,128,1,float16,float16,0,0.338703989982605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,4,128,1,float16,fp8,0,0.3362079858779907
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,8,128,1,float16,float16,0,0.339685320854187
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,40,8,128,1,float16,fp8,0,0.33460267384847003
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,40,128,1,float16,fp8,0,0.17270932594935098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,40,128,1,float16,float16,0,0.17562133073806763
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,40,40,128,1,float16,fp8,0,0.02124800036350886
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,40,2,128,1,float16,float16,0,0.01482133318980535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,1,128,1,float16,float16,0,0.17763733863830566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,1,128,1,float16,fp8,0,0.1744640072186788
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,4,128,1,float16,float16,0,0.17750932772954306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,2,128,1,float16,float16,0,0.1784693400065104
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,2,128,1,float16,fp8,0,0.17412267128626505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,8,128,1,float16,fp8,0,0.17246399323145548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,8,128,1,float16,float16,0,0.17478400468826294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,1,128,1,float16,fp8,0,0.09396800398826599
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,1,128,1,float16,float16,0,0.09669867157936096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,40,128,1,float16,float16,0,0.09494400024414062
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,2,128,1,float16,fp8,0,0.09287466605504353
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,4,128,1,float16,fp8,0,0.09252799550692241
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,8,128,1,float16,float16,0,0.09385599692662557
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,8,128,1,float16,fp8,0,0.09261866410573323
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,1,128,1,float16,float16,0,0.05179733534653982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,40,128,1,float16,float16,0,0.052015999952952065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,2,128,1,float16,float16,0,0.05053866902987162
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,2,128,1,float16,fp8,0,0.04994133114814758
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,4,128,1,float16,float16,0,0.04990933338801066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,4,128,1,float16,fp8,0,0.04966400067011515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,8,128,1,float16,float16,0,0.05186666548252106
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,40,4,128,1,float16,fp8,0,0.17320533593495688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,8,128,1,float16,fp8,0,0.049957334995269775
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,1,128,1,float16,float16,0,0.0313226655125618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,1,128,1,float16,fp8,0,0.029701332251230877
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,2,128,1,float16,float16,0,0.09532266855239868
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,4,128,1,float16,float16,0,0.09520000219345093
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,2,128,1,float16,float16,0,0.031141333281993866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,40,128,1,float16,float16,0,0.03121600051720937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,2,128,1,float16,fp8,0,0.029317334294319153
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,40,128,1,float16,fp8,0,0.02957333376010259
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,4,128,1,float16,float16,0,0.031221332649389904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,4,128,1,float16,fp8,0,0.029509333272775013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,8,128,1,float16,float16,0,0.02951466788848241
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,40,8,128,1,float16,fp8,0,0.02934933453798294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,1,128,1,float16,float16,0,0.021205333371957142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,2,128,1,float16,float16,0,0.020917333662509918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,2,128,1,float16,fp8,0,0.02025066688656807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,40,128,1,float16,fp8,0,0.05177066723505656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,1,128,1,float16,fp8,0,0.02062400057911873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,40,128,1,float16,fp8,0,0.020096000283956528
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,4,128,1,float16,fp8,0,0.02029866725206375
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,4,128,1,float16,float16,0,0.02086399992307027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,8,128,1,float16,fp8,0,0.019882666567961376
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,1,128,1,float16,fp8,0,0.015008000036080679
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,2,128,1,float16,float16,0,0.015103999525308609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,40,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,2,128,1,float16,fp8,0,0.014837333311637243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,40,128,1,float16,float16,0,0.014975999792416891
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,4,128,1,float16,float16,0,0.015141333142916361
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,4,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,8,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,40,40,128,1,float16,fp8,0,0.0922933320204417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,1,128,1,float16,float16,0,0.011034666250149408
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,40,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,1,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,2,128,1,float16,float16,0,0.010847999403874079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,2,128,1,float16,fp8,0,0.01099733387430509
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,4,128,1,float16,float16,0,0.01109333336353302
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,4,128,1,float16,fp8,0,0.0107893335322539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,40,1,128,1,float16,fp8,0,0.04993600149949392
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,40,128,1,float16,float16,0,0.020879998803138733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,8,128,1,float16,float16,0,0.010821333775917688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,1,128,1,float16,float16,0,0.01492799942692121
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,1,128,1,float16,float16,0,0.010698666175206503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,1,128,1,float16,fp8,0,0.010703999549150467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,40,128,1,float16,float16,0,0.01099733387430509
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,2,128,1,float16,float16,0,0.011034666250149408
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,40,128,1,float16,fp8,0,0.0107893335322539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,2,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,4,128,1,float16,float16,0,0.010746666540702185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,4,128,1,float16,fp8,0,0.010890666395425797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,8,128,1,float16,float16,0,0.010954666882753372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,40,8,128,1,float16,fp8,0,0.010746666540702185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,40,8,128,1,float16,float16,0,0.015429332852363586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,40,128,1,float16,float16,0,0.01109333336353302
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,1,128,1,float16,float16,0,0.2686186631520589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,1,128,1,float16,fp8,0,0.2587520082791646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,2,128,1,float16,fp8,0,0.25918932755788165
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,40,8,128,1,float16,fp8,0,0.010714666297038397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,40,8,128,1,float16,float16,0,0.020879998803138733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,4,128,1,float16,float16,0,0.263589342435201
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,8,128,1,float16,float16,0,0.262938658396403
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,1,128,1,float16,float16,0,0.1387999951839447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,4,128,1,float16,fp8,0,0.2588160037994385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,8,128,1,float16,fp8,0,0.2586826682090759
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,40,128,1,float16,float16,0,0.13925333817799887
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,1,128,1,float16,fp8,0,0.13645866513252258
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,2,128,1,float16,fp8,0,0.1339413324991862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,40,128,1,float16,fp8,0,0.1338986655076345
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,2,128,1,float16,float16,0,0.143477330605189
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,4,128,1,float16,float16,0,0.1390773355960846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,4,128,1,float16,fp8,0,0.13398399949073792
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,8,128,1,float16,fp8,0,0.13572266697883606
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,1,128,1,float16,float16,0,0.07503466804822286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,1,128,1,float16,fp8,0,0.07241599758466084
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,40,8,128,1,float16,float16,0,0.14131200313568115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,40,128,1,float16,float16,0,0.07646933197975159
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,4,128,1,float16,float16,0,0.07656000057856242
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,2,128,1,float16,fp8,0,0.07367466886838277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,40,128,1,float16,fp8,0,0.07203733424345653
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,8,128,1,float16,float16,0,0.07737599809964497
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,1,128,1,float16,fp8,0,0.03967999915281931
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,8,128,1,float16,fp8,0,0.0735999991496404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,1,128,1,float16,float16,0,0.04001600046952566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,40,128,1,float16,float16,0,0.041706666350364685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,2,128,1,float16,float16,0,0.04142933338880539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,40,128,1,float16,fp8,0,0.03950933367013931
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,2,128,1,float16,fp8,0,0.0397173340121905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,4,128,1,float16,float16,0,0.03965333352486292
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,8,128,1,float16,float16,0,0.0413973331451416
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,8,128,1,float16,fp8,0,0.03967999915281931
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,40,4,128,1,float16,fp8,0,0.03979733337958654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,1,128,1,float16,float16,0,0.025360000630219776
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,2,128,1,float16,float16,0,0.02535466601451238
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,2,128,1,float16,fp8,0,0.025125332176685333
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,1,128,1,float16,fp8,0,0.025146665672461193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,40,128,1,float16,float16,0,0.025274666647116344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,40,128,1,float16,fp8,0,0.02478933334350586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,4,128,1,float16,float16,0,0.025146665672461193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,4,128,1,float16,fp8,0,0.02497066557407379
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,8,128,1,float16,float16,0,0.02502399931351344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,1,128,1,float16,float16,0,0.01709866647919019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,1,128,1,float16,fp8,0,0.01720533271630605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,40,2,128,1,float16,float16,0,0.26151466369628906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,2,128,1,float16,float16,0,0.01695999999841054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,2,128,1,float16,fp8,0,0.017103999853134155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,4,128,1,float16,fp8,0,0.016976000120242436
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,40,128,1,float16,fp8,0,0.01695466662446658
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,40,128,1,float16,float16,0,0.01714133347074191
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,8,128,1,float16,fp8,0,0.016917333006858826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,8,128,1,float16,float16,0,0.017173333714405697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,40,4,128,1,float16,float16,0,0.01714133347074191
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,1,128,1,float16,float16,0,0.013061333447694778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,1,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,2,128,1,float16,float16,0,0.013125333935022354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,2,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,4,128,1,float16,float16,0,0.013034666577974955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,8,128,1,float16,float16,0,0.013914667069911957
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,40,128,1,float16,float16,0,0.01320533330241839
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,40,128,1,float16,fp8,0,0.01081066702802976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,40,128,1,float16,float16,0,0.0107893335322539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,1,128,1,float16,fp8,0,0.010656000425418219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,40,8,128,1,float16,fp8,0,0.024986666937669117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,2,128,1,float16,fp8,0,0.010565333068370819
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,4,128,1,float16,fp8,0,0.07218666871388753
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,4,128,1,float16,float16,0,0.010618666807810465
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,4,128,1,float16,fp8,0,0.010816000401973724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,8,128,1,float16,float16,0,0.010682666053374609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,8,128,1,float16,fp8,0,0.010746666540702185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,40,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,40,128,1,float16,float16,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,4,128,1,float16,fp8,0,0.012773333738247553
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,40,8,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,1,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,1,128,1,float16,float16,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,2,128,1,float16,fp8,0,0.010559999694426855
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,4,128,1,float16,float16,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,4,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,8,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,8,128,1,float16,float16,0,0.010687999427318573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,1,128,1,float16,float16,0,0.010751999914646149
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,40,2,128,1,float16,float16,0,0.010570666442314783
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,1,128,1,float16,float16,0,0.23381332556406656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,1,128,1,float16,fp8,0,0.2233546574910482
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,40,2,128,1,float16,float16,0,0.0740533322095871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,2,128,1,float16,float16,0,0.23089067141215006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,2,128,1,float16,fp8,0,0.22396800915400186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,4,128,1,float16,float16,0,0.22766399383544922
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,4,128,1,float16,fp8,0,0.22178133328755698
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,8,128,1,float16,fp8,0,0.22447466850280762
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,40,8,128,1,float16,float16,0,0.22763733069101968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,2,128,1,float16,float16,0,0.009882666791478792
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,40,40,128,1,float16,fp8,0,0.010069333637754122
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,1,128,1,float16,float16,0,0.1204853355884552
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,1,128,1,float16,fp8,0,0.11749866604804993
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,40,128,1,float16,float16,0,0.1202133297920227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,2,128,1,float16,fp8,0,0.11611200372378032
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,40,128,1,float16,fp8,0,0.11541866262753804
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,2,128,1,float16,float16,0,0.12056533495585124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,4,128,1,float16,float16,0,0.12123733758926392
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,4,128,1,float16,fp8,0,0.11512533823649089
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,1,128,1,float16,float16,0,0.06413333117961884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,8,128,1,float16,float16,0,0.12038399775822957
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,1,128,1,float16,fp8,0,0.06279466549555461
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,2,128,1,float16,fp8,0,0.06215466558933258
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,4,128,1,float16,float16,0,0.06534933547178905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,40,128,1,float16,fp8,0,0.062261333068211876
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,8,128,1,float16,float16,0,0.06312533219655354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,8,128,1,float16,fp8,0,0.0620000014702479
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,1,128,1,float16,float16,0,0.03651199986537298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,40,128,1,float16,float16,0,0.036602665980656944
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,40,128,1,float16,fp8,0,0.03523733218510946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,2,128,1,float16,float16,0,0.03551466763019562
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,4,128,1,float16,fp8,0,0.033546666304270424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,2,128,1,float16,fp8,0,0.035605333745479584
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,8,128,1,float16,float16,0,0.03545066714286804
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,8,128,1,float16,fp8,0,0.03345600018898646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,40,128,1,float16,float16,0,0.023232000569502514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,40,128,1,float16,fp8,0,0.021349333226680756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,40,128,1,float16,float16,0,0.065610667069753
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,40,8,128,1,float16,fp8,0,0.11745599905649821
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,1,128,1,float16,fp8,0,0.02290133386850357
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,2,128,1,float16,float16,0,0.023013333479563396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,2,128,1,float16,fp8,0,0.021173333128293354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,4,128,1,float16,fp8,0,0.021269333859284718
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,8,128,1,float16,float16,0,0.023034666975339253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,8,128,1,float16,fp8,0,0.021002667645613354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,1,128,1,float16,fp8,0,0.03554133325815201
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,1,128,1,float16,float16,0,0.015392000476519266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,40,4,128,1,float16,float16,0,0.03703466554482778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,1,128,1,float16,fp8,0,0.015146666516860327
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,40,128,1,float16,float16,0,0.016794666647911072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,2,128,1,float16,float16,0,0.016832000265518825
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,4,128,1,float16,fp8,0,0.01515199989080429
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,4,128,1,float16,float16,0,0.01651200031240781
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,8,128,1,float16,float16,0,0.015493333339691162
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,8,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,2,128,1,float16,float16,0,0.0643093337615331
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,1,128,1,float16,float16,0,0.012762666990359625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,1,128,1,float16,float16,0,0.023056000471115112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,40,4,128,1,float16,fp8,0,0.062218666076660156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,40,128,1,float16,fp8,0,0.012794667234023413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,1,128,1,float16,fp8,0,0.012629333883523941
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,40,128,1,float16,float16,0,0.01293333371480306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,2,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,40,4,128,1,float16,float16,0,0.023039999107519787
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,4,128,1,float16,fp8,0,0.012714666624863943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,8,128,1,float16,float16,0,0.012730666746695837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,4,128,1,float16,float16,0,0.012826666235923767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,8,128,1,float16,fp8,0,0.012655999511480331
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,1,128,1,float16,fp8,0,0.009317333499590555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,2,128,1,float16,float16,0,0.010117333382368088
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,1,128,1,float16,float16,0,0.009242666885256767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,40,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,4,128,1,float16,float16,0,0.010399999717871347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,4,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,40,128,1,float16,float16,0,0.010357333347201347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,8,128,1,float16,fp8,0,0.009328000247478485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,40,128,1,float16,fp8,0,0.014896000425020853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,1,128,1,float16,float16,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,40,2,128,1,float16,fp8,0,0.015216000378131866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,1,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,2,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,40,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,4,128,1,float16,float16,0,0.009093333035707474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,40,128,1,float16,float16,0,0.009130666653315226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,4,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,8,128,1,float16,float16,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,8,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,1,128,1,float16,float16,0,0.2238933245340983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,40,2,128,1,float16,float16,0,0.013066666821638743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,1,128,1,float16,fp8,0,0.2104640007019043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,2,128,1,float16,float16,0,0.22297066450119019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,2,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,4,128,1,float16,float16,0,0.22224533557891846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,40,8,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,4,128,1,float16,fp8,0,0.2060906688372294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,8,128,1,float16,fp8,0,0.2072640061378479
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,8,128,1,float16,float16,0,0.22096532583236694
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,40,2,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,1,128,1,float16,fp8,0,0.10734400153160095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,40,128,1,float16,float16,0,0.1172320048014323
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,40,128,1,float16,fp8,0,0.1086293359597524
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,2,128,1,float16,fp8,0,0.11044266819953918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,4,128,1,float16,float16,0,0.11762666702270508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,8,128,1,float16,float16,0,0.11675199866294861
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,8,128,1,float16,fp8,0,0.10774933298428853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,4,128,1,float16,fp8,0,0.1099626620610555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,40,2,128,1,float16,fp8,0,0.20743467410405478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,1,128,1,float16,float16,0,0.06417066852251689
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,1,128,1,float16,fp8,0,0.058143998185793556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,40,128,1,float16,float16,0,0.06573333342870076
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,2,128,1,float16,float16,0,0.0672159989674886
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,40,128,1,float16,fp8,0,0.058602665861447654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,2,128,1,float16,fp8,0,0.05815466741720835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,4,128,1,float16,float16,0,0.06328533093134563
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,8,128,1,float16,float16,0,0.06206933160622915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,8,128,1,float16,fp8,0,0.05889600018660227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,40,4,128,1,float16,fp8,0,0.05955199897289276
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,1,128,1,float16,float16,0,0.1158026655515035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,40,128,1,float16,fp8,0,0.033344000577926636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,4,128,1,float16,float16,0,0.03591466695070267
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,2,128,1,float16,fp8,0,0.03346133232116699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,40,2,128,1,float16,float16,0,0.11830400427182515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,8,128,1,float16,float16,0,0.03530666728814443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,8,128,1,float16,fp8,0,0.03349333256483078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,1,128,1,float16,float16,0,0.023285334308942158
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,40,128,1,float16,fp8,0,0.02088533341884613
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,1,128,1,float16,fp8,0,0.02124800036350886
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,2,128,1,float16,float16,0,0.02290133386850357
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,2,128,1,float16,fp8,0,0.021205333371957142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,4,128,1,float16,float16,0,0.021615999440352123
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,4,128,1,float16,fp8,0,0.021141332884629566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,8,128,1,float16,float16,0,0.02186666677395503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,8,128,1,float16,fp8,0,0.021173333128293354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,1,128,1,float16,float16,0,0.03600533306598663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,40,128,1,float16,float16,0,0.015018666783968607
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,40,128,1,float16,fp8,0,0.015247999380032221
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,1,128,1,float16,fp8,0,0.033333333830038704
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,1,128,1,float16,fp8,0,0.01488000030318896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,2,128,1,float16,fp8,0,0.014869333555301031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,4,128,1,float16,float16,0,0.015034666905800501
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,4,128,1,float16,fp8,0,0.014853333433469137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,8,128,1,float16,float16,0,0.015072000523408255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,8,128,1,float16,fp8,0,0.015013333410024643
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,40,40,128,1,float16,float16,0,0.023050665855407715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,40,128,1,float16,float16,0,0.012639999389648438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,40,128,1,float16,fp8,0,0.011717333147923151
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,2,128,1,float16,float16,0,0.012815999488035837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,2,128,1,float16,fp8,0,0.011359999577204386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,4,128,1,float16,float16,0,0.012821332861979803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,4,128,1,float16,fp8,0,0.012746666868527731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,1,128,1,float16,float16,0,0.014837333311637243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,2,128,1,float16,float16,0,0.037263999382654824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,8,128,1,float16,float16,0,0.012784000486135483
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,4,128,1,float16,fp8,0,0.0332640012105306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,1,128,1,float16,float16,0,0.009141333401203156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,40,128,1,float16,fp8,0,0.009103999783595404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,40,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,1,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,2,128,1,float16,fp8,0,0.009077333534757296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,4,128,1,float16,fp8,0,0.009162666896979014
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,1,128,1,float16,fp8,0,0.01163200040658315
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,8,128,1,float16,fp8,0,0.009082666908701261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,40,128,1,float16,float16,0,0.008821333448092142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,1,128,1,float16,float16,0,0.008687999720374743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,1,128,1,float16,fp8,0,0.008581333483258883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,40,40,128,1,float16,float16,0,0.03595733394225439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,40,2,128,1,float16,float16,0,0.015119999647140503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,4,128,1,float16,float16,0,0.009109333157539368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,2,128,1,float16,fp8,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,4,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,8,128,1,float16,float16,0,0.00867733359336853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,2,128,1,float16,float16,0,0.010053333515922228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,1,128,1,float16,float16,0,0.012719999998807907
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,1,128,1,float16,float16,0,0.012837332983811697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,1,128,1,float16,fp8,0,0.010821333775917688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,40,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,2,128,1,float16,float16,0,0.018842666099468868
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,2,128,1,float16,fp8,0,0.016858667135238647
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,40,8,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,4,128,1,float16,float16,0,0.021359999974568684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,0,0.016895999511082966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,8,128,1,float16,float16,0,0.027072000006834667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,8,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,0,0.023034666975339253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,1,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,40,128,1,float16,float16,0,0.06399466594060262
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,40,128,1,float16,fp8,0,0.047824000318845115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,1,128,1,float16,fp8,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,2,128,1,float16,float16,0,0.013050666699806849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,2,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,0,0.014159999787807465
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,40,8,128,1,float16,float16,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,0,0.016832000265518825
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,1,128,1,float16,float16,0,0.008746666833758354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,1,128,1,float16,fp8,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,2,128,1,float16,float16,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,0,0.01071999967098236
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,2,128,1,float16,fp8,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,0,0.011066666493813196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,40,2,128,1,float16,float16,0,0.008661333471536636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,0,0.010784000158309937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,1,128,1,float16,float16,0,0.008586666857202848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,40,128,1,float16,float16,0,0.019088000059127808
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,1,128,1,float16,fp8,0,0.008682666967312494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,40,128,1,float16,fp8,0,0.016970666746298473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,2,128,1,float16,float16,0,0.0084906667470932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,2,128,1,float16,fp8,0,0.008709333216150602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,0,0.008837333569924036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,0,0.007120000198483467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,0,0.008602666358153025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,1,128,1,float16,float16,0,0.008565333361426989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,1,128,1,float16,fp8,0,0.008559999987483025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,40,128,1,float16,float16,0,0.013007999708255133
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,40,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,2,128,1,float16,float16,0,0.008874666566650072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,2,128,1,float16,fp8,0,0.008714666590094566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,0,0.008362666393319765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,0,0.006949333474040031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,0,0.007871999715765318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,40,128,1,float16,float16,0,0.03222399950027466
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,0,0.007007999966541926
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,1,128,1,float16,float16,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,1,128,1,float16,fp8,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,40,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,40,128,1,float16,float16,0,0.010709332923094431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,2,128,1,float16,fp8,0,0.008634666601816813
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,0,0.006965333595871925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,0,0.0069386667261521024
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,0,0.007061333085099856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,0,0.006821333120266597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,1,128,1,float16,float16,0,0.0086666668454806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,1,128,1,float16,fp8,0,0.007050666958093643
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,2,128,1,float16,float16,0,0.008687999720374743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,2,128,1,float16,fp8,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,40,128,1,float16,fp8,0,0.008592000231146812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,0,0.0069759997228781385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,0,0.00702400008837382
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,0,0.006826666494210561
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,0,0.00666133314371109
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,1,128,1,float16,float16,0,0.009077333534757296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,0,0.014837333311637243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,1,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,40,128,1,float16,float16,0,0.007285333548982938
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,2,128,1,float16,fp8,0,0.00867733359336853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,0,0.006927999978264173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,40,128,1,float16,fp8,0,0.027077332139015198
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,0,0.00701333334048589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,0,0.0068853336075941724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,40,2,128,1,float16,float16,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,1,128,1,float16,float16,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,40,128,1,float16,fp8,0,0.00666133314371109
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,1,128,1,float16,fp8,0,0.006858666737874349
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,40,128,1,float16,float16,0,0.0069759997228781385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,2,128,1,float16,float16,0,0.008661333471536636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,2,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,0,0.00696000022192796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,0,0.006671999891599019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,0,0.0069333333522081375
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,0,0.006751999879876773
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,40,40,128,1,float16,float16,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,2,128,1,float16,float16,0,0.008757333581646284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,0,0.0069759997228781385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,1,128,1,float16,fp8,0,9.966970443725586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,1,128,1,float16,float16,0,10.84274164835612
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,40,40,128,1,float16,fp8,0,0.006693333387374878
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,2,128,1,float16,float16,0,10.842149098714193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,2,128,1,float16,fp8,0,10.540197372436523
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,4,128,1,float16,fp8,0,9.975296020507812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,4,128,1,float16,float16,0,10.84731674194336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,8,128,1,float16,float16,0,10.846122741699219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,1,128,1,float16,fp8,0,5.369504292805989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,32,128,1,float16,fp8,0,5.0556214650472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,32,128,1,float16,float16,0,5.497189203898112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,1,128,1,float16,float16,0,7.503119786580403
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,2,128,1,float16,fp8,0,5.047194798787435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,2,128,1,float16,float16,0,6.31495475769043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,32,8,128,1,float16,fp8,0,9.986821492513021
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,4,128,1,float16,float16,0,7.37068239847819
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,4,128,1,float16,fp8,0,5.368607838948567
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,32,128,1,float16,float16,0,2.992794672648112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,8,128,1,float16,float16,0,5.493066787719727
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,32,128,1,float16,fp8,0,2.5853120485941568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,1,128,1,float16,fp8,0,2.741135915120443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,32,8,128,1,float16,fp8,0,5.3663679758707685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,2,128,1,float16,fp8,0,2.745877265930176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,2,128,1,float16,float16,0,3.5759948094685874
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,4,128,1,float16,fp8,0,2.7497708002726235
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,4,128,1,float16,float16,0,2.9407145182291665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,8,128,1,float16,float16,0,2.811386744181315
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,8,128,1,float16,fp8,0,2.742512067159017
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,32,128,1,float16,float16,0,1.4712427457173665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,32,128,1,float16,fp8,0,1.419040044148763
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,1,128,1,float16,fp8,0,1.4340319633483887
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,1,128,1,float16,float16,0,2.0187999407450357
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,2,128,1,float16,float16,0,1.762506643931071
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,2,128,1,float16,fp8,0,1.4322986602783203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,4,128,1,float16,float16,0,1.7886239687601726
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,8,128,1,float16,fp8,0,1.353376070658366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,32,1,128,1,float16,float16,0,3.4819361368815103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,1,128,1,float16,fp8,0,5.751610438028972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,2,128,1,float16,fp8,0,6.056751887003581
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,4,128,1,float16,fp8,0,1.433370590209961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,2,128,1,float16,float16,0,8.013162612915039
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,4,128,1,float16,float16,0,6.281152089436849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,32,8,128,1,float16,float16,0,1.7469654083251953
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,1,128,1,float16,float16,0,3.4037599563598633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,4,128,1,float16,fp8,0,6.331968307495117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,32,128,1,float16,fp8,0,3.1487948099772134
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,32,128,1,float16,float16,0,3.328000068664551
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,8,128,1,float16,float16,0,6.243349075317383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,1,128,1,float16,float16,0,8.674437204996744
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,2,128,1,float16,fp8,0,3.15666135152181
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,4,128,1,float16,fp8,0,2.9180053075154624
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,4,128,1,float16,float16,0,3.343658765157064
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,8,128,1,float16,float16,0,4.283354759216309
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,32,128,1,float16,float16,0,1.6463947296142578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,32,128,1,float16,fp8,0,1.5031040509541829
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,1,128,1,float16,fp8,0,3.1088854471842446
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,1,128,1,float16,fp8,0,1.8829654057820637
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,32,8,128,1,float16,fp8,0,6.134885152180989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,2,128,1,float16,fp8,0,1.4994452794392903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,2,128,1,float16,float16,0,2.007311979929606
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,4,128,1,float16,float16,0,1.6416160265604656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,4,128,1,float16,fp8,0,1.7071253458658855
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,2,128,1,float16,float16,0,3.549973487854004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,8,128,1,float16,float16,0,1.7461279233296711
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,32,128,1,float16,float16,0,0.9907200336456299
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,32,128,1,float16,fp8,0,0.796832005182902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,1,128,1,float16,float16,0,0.9900320370992025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,2,128,1,float16,float16,0,1.0313866933186848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,32,8,128,1,float16,fp8,0,3.1107308069864907
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,2,128,1,float16,fp8,0,0.8711360295613607
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,8,128,1,float16,float16,0,0.9254399935404459
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,1,128,1,float16,float16,0,2.011258602142334
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,8,128,1,float16,fp8,0,0.8490026791890463
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,1,128,1,float16,float16,0,4.426565488179524
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,32,8,128,1,float16,fp8,0,1.5012000401814778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,1,128,1,float16,fp8,0,4.33356253306071
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,2,128,1,float16,float16,0,4.756410598754883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,1,128,1,float16,fp8,0,0.8506879806518555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,4,128,1,float16,fp8,0,0.8217013676961263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,32,4,128,1,float16,float16,0,0.9238080183664957
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,4,128,1,float16,float16,0,4.413381258646647
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,4,128,1,float16,fp8,0,4.052559852600098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,32,128,1,float16,float16,0,2.635455926259359
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,1,128,1,float16,float16,0,2.706495920817057
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,32,128,1,float16,fp8,0,2.0714720090230307
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,8,128,1,float16,float16,0,5.307813326517741
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,8,128,1,float16,fp8,0,4.320890744527181
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,1,128,1,float16,fp8,0,2.208613395690918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,2,128,1,float16,float16,0,2.395946661631266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,2,128,1,float16,fp8,0,2.596773306528727
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,32,2,128,1,float16,fp8,0,4.335920015970866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,4,128,1,float16,float16,0,2.5398666063944497
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,4,128,1,float16,fp8,0,2.209557374318441
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,8,128,1,float16,fp8,0,2.069045384724935
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,32,8,128,1,float16,float16,0,2.8266881306966147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,32,128,1,float16,float16,0,1.1697386900583904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,1,128,1,float16,float16,0,1.2905813058217366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,32,128,1,float16,fp8,0,1.1054346561431885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,2,128,1,float16,float16,0,1.291914701461792
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,4,128,1,float16,float16,0,1.4264639218648274
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,8,128,1,float16,float16,0,1.1669867038726807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,32,128,1,float16,float16,0,0.6351946592330933
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,1,128,1,float16,float16,0,0.7422400315602621
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,1,128,1,float16,fp8,0,0.6136320034662882
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,2,128,1,float16,fp8,0,0.6154506603876749
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,2,128,1,float16,float16,0,0.7466453711191813
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,4,128,1,float16,float16,0,0.6668746471405029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,4,128,1,float16,fp8,0,0.6136480172475179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,8,128,1,float16,float16,0,0.6652586857477824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,8,128,1,float16,fp8,0,0.6124053398768107
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,1,128,1,float16,fp8,0,1.1455413500467937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,2,128,1,float16,fp8,0,1.1453546682993572
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,8,128,1,float16,fp8,0,1.1237280368804932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,32,32,128,1,float16,fp8,0,0.5806506474812826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,1,128,1,float16,fp8,0,5.6503041585286455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,2,128,1,float16,float16,0,5.715994517008464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,1,128,1,float16,float16,0,6.568000157674153
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,2,128,1,float16,fp8,0,5.2701921463012695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,4,128,1,float16,float16,0,5.7215576171875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,4,128,1,float16,fp8,0,5.624799728393555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,32,128,1,float16,fp8,0,2.7665494283040366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,8,128,1,float16,float16,0,5.7198028564453125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,32,4,128,1,float16,fp8,0,1.3858027458190918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,1,128,1,float16,float16,0,3.587242762247721
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,1,128,1,float16,fp8,0,2.6648213068644204
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,32,8,128,1,float16,fp8,0,5.512208302815755
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,2,128,1,float16,float16,0,3.0233227411905923
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,2,128,1,float16,fp8,0,2.6750612258911133
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,4,128,1,float16,fp8,0,2.853605270385742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,4,128,1,float16,float16,0,3.509466807047526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,8,128,1,float16,fp8,0,2.8619359334309897
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,32,128,1,float16,fp8,0,1.3696533838907878
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,32,128,1,float16,float16,0,1.4880320231119792
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,1,128,1,float16,float16,0,1.584287961324056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,1,128,1,float16,fp8,0,1.4589653015136719
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,2,128,1,float16,fp8,0,1.4994932810465496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,2,128,1,float16,float16,0,2.0605386098225913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,4,128,1,float16,float16,0,1.545365333557129
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,4,128,1,float16,fp8,0,1.3640693028767903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,32,128,1,float16,float16,0,2.9075892766316733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,8,128,1,float16,float16,0,1.4825332959493
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,1,128,1,float16,float16,0,0.8288640181223551
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,32,8,128,1,float16,fp8,0,1.4664907455444336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,1,128,1,float16,fp8,0,0.7641332944234213
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,32,128,1,float16,float16,0,0.9537599881490072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,2,128,1,float16,fp8,0,0.7135252952575684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,4,128,1,float16,float16,0,0.9469652970631918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,4,128,1,float16,fp8,0,0.7640480200449625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,8,128,1,float16,fp8,0,0.7169653574625651
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,8,128,1,float16,float16,0,0.8217919667561849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,1,128,1,float16,float16,0,0.4908906618754069
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,32,128,1,float16,float16,0,0.4280906518300374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,32,8,128,1,float16,float16,0,3.124693234761556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,1,128,1,float16,fp8,0,0.425439993540446
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,32,128,1,float16,fp8,0,0.3908533255259196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,2,128,1,float16,float16,0,0.4646933476130168
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,4,128,1,float16,fp8,0,0.4202186663945516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,2,128,1,float16,fp8,0,0.4191253185272217
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,8,128,1,float16,float16,0,0.4256266752878825
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,4,128,1,float16,float16,0,0.46541333198547363
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,32,8,128,1,float16,fp8,0,0.38862931728363037
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,2,128,1,float16,float16,0,0.7759466965993246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,1,128,1,float16,fp8,0,3.075221379597982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,2,128,1,float16,fp8,0,3.076826731363932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,2,128,1,float16,float16,0,3.34332275390625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,4,128,1,float16,float16,0,3.340442657470703
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,4,128,1,float16,fp8,0,3.0750134785970054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,1,128,1,float16,float16,0,1.696730613708496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,1,128,1,float16,fp8,0,1.5618720054626465
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,32,128,1,float16,fp8,0,1.5705599784851074
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,8,128,1,float16,float16,0,3.3437493642171225
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,8,128,1,float16,fp8,0,3.0773067474365234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,2,128,1,float16,float16,0,1.6967946688334148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,2,128,1,float16,fp8,0,1.5618774096171062
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,4,128,1,float16,float16,0,1.8209865887959797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,4,128,1,float16,fp8,0,1.5618400573730469
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,8,128,1,float16,float16,0,1.6992586453755696
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,32,1,128,1,float16,float16,0,3.5363734563191733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,8,128,1,float16,fp8,0,1.5634133021036785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,1,128,1,float16,float16,0,0.8753706614176432
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,1,128,1,float16,fp8,0,0.8648959795633951
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,32,128,1,float16,fp8,0,0.86516801516215
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,2,128,1,float16,fp8,0,0.8630346457163492
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,2,128,1,float16,float16,0,1.0687359968821208
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,4,128,1,float16,float16,0,0.8766559759775797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,4,128,1,float16,fp8,0,0.8643306891123453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,8,128,1,float16,float16,0,0.9428426424662272
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,8,128,1,float16,fp8,0,0.8067946434020996
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,1,128,1,float16,float16,0,0.46588265895843506
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,32,128,1,float16,float16,0,0.47093868255615234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,32,32,128,1,float16,float16,0,1.7122774124145508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,1,128,1,float16,fp8,0,0.46352001031239826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,2,128,1,float16,fp8,0,0.42858131726582843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,4,128,1,float16,float16,0,0.5030346711476644
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,8,128,1,float16,fp8,0,0.43009066581726074
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,8,128,1,float16,float16,0,0.4710293213526408
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,1,128,1,float16,float16,0,0.2971839904785156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,32,128,1,float16,float16,0,0.26473067204157513
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,1,128,1,float16,fp8,0,0.2574506600697835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,32,32,128,1,float16,fp8,0,0.7161546548207601
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,32,32,128,1,float16,float16,0,0.883407990137736
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,2,128,1,float16,fp8,0,0.2592159907023112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,2,128,1,float16,float16,0,0.2853813370068868
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,4,128,1,float16,float16,0,0.2632693250974019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,4,128,1,float16,fp8,0,0.2587413390477498
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,8,128,1,float16,fp8,0,0.2584693431854248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,8,128,1,float16,float16,0,0.2636373241742452
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,32,128,1,float16,fp8,0,0.4306453466415405
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,2,128,1,float16,float16,0,0.465338667233785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,1,128,1,float16,float16,0,3.1525227228800454
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,32,4,128,1,float16,fp8,0,0.4596533377965291
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,2,128,1,float16,float16,0,3.1515413920084634
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,1,128,1,float16,fp8,0,2.9042186737060547
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,2,128,1,float16,fp8,0,2.907888094584147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,32,32,128,1,float16,fp8,0,0.24103466669718424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,4,128,1,float16,float16,0,3.1525227228800454
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,4,128,1,float16,fp8,0,2.9065119425455728
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,1,128,1,float16,float16,0,1.988906701405843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,32,128,1,float16,float16,0,1.6248745918273926
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,32,128,1,float16,fp8,0,1.4817546208699544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,8,128,1,float16,float16,0,3.1624959309895835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,1,128,1,float16,fp8,0,1.4770347277323406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,32,8,128,1,float16,fp8,0,2.9109118779500327
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,2,128,1,float16,fp8,0,1.574677308400472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,2,128,1,float16,float16,0,1.9442346890767415
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,4,128,1,float16,float16,0,1.8755839665730794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,1,128,1,float16,fp8,0,0.8058186372121176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,8,128,1,float16,fp8,0,1.4729119936625164
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,8,128,1,float16,float16,0,1.5996373494466145
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,32,128,1,float16,fp8,0,0.7588106791178385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,2,128,1,float16,float16,0,0.9827626546223959
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,2,128,1,float16,fp8,0,0.8808053334554037
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,4,128,1,float16,float16,0,0.8469173113505045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,8,128,1,float16,float16,0,0.8209119637807211
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,8,128,1,float16,fp8,0,0.8674986362457275
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,32,128,1,float16,float16,0,0.45154666900634766
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,1,128,1,float16,float16,0,0.44401601950327557
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,32,128,1,float16,fp8,0,0.3994239966074626
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,2,128,1,float16,float16,0,0.43778133392333984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,1,128,1,float16,fp8,0,0.4309653441111247
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,2,128,1,float16,fp8,0,0.42534399032592773
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,4,128,1,float16,float16,0,0.4337013165156047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,4,128,1,float16,fp8,0,0.39766931533813477
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,8,128,1,float16,float16,0,0.43143999576568604
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,32,8,128,1,float16,fp8,0,0.3974986473719279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,1,128,1,float16,float16,0,0.8731199900309244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,32,4,128,1,float16,fp8,0,1.5736907323201497
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,1,128,1,float16,float16,0,0.23773866891860962
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,1,128,1,float16,fp8,0,0.2366559902826945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,2,128,1,float16,float16,0,0.2704799969991048
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,32,128,1,float16,fp8,0,0.21955732504526773
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,2,128,1,float16,fp8,0,0.2161759932835897
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,4,128,1,float16,float16,0,0.23667732874552408
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,4,128,1,float16,fp8,0,0.8064106305440267
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,8,128,1,float16,float16,0,0.2376799980799357
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,8,128,1,float16,fp8,0,0.21780800819396973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,1,128,1,float16,float16,0,0.15051199992497763
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,1,128,1,float16,fp8,0,0.13672000169754028
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,32,128,1,float16,float16,0,0.142085333665212
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,2,128,1,float16,fp8,0,0.13621866703033447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,4,128,1,float16,fp8,0,0.12570133805274963
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,8,128,1,float16,float16,0,0.13924800356229147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,8,128,1,float16,fp8,0,0.12705600261688232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,1,128,1,float16,float16,0,1.8894880612691243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,1,128,1,float16,fp8,0,1.7449439366658528
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,32,128,1,float16,float16,0,0.24611733357111612
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,32,32,128,1,float16,float16,0,0.8329226970672607
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,32,4,128,1,float16,fp8,0,0.23219732443491617
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,2,128,1,float16,float16,0,2.3509546915690103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,4,128,1,float16,float16,0,1.8913386662801106
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,4,128,1,float16,fp8,0,1.7457547187805176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,4,128,1,float16,float16,0,0.13831999897956848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,8,128,1,float16,float16,0,2.01472536722819
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,32,128,1,float16,fp8,0,0.8945759932200114
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,32,128,1,float16,float16,0,0.9791519641876221
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,1,128,1,float16,float16,0,0.9596319993336996
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,1,128,1,float16,fp8,0,0.9654719829559326
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,2,128,1,float16,fp8,0,0.8860479990641276
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,4,128,1,float16,fp8,0,0.9422346750895182
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,8,128,1,float16,float16,0,0.9643627007802328
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,8,128,1,float16,fp8,0,0.9500106970469157
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,2,128,1,float16,fp8,0,1.74401060740153
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,2,128,1,float16,float16,0,0.1532693306605021
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,1,128,1,float16,float16,0,0.5883893171946207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,32,128,1,float16,float16,0,0.5065866708755493
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,32,128,1,float16,fp8,0,0.46220799287160236
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,2,128,1,float16,float16,0,0.5890560150146484
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,32,8,128,1,float16,fp8,0,1.9835039774576824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,4,128,1,float16,float16,0,0.5299093325932821
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,2,128,1,float16,fp8,0,0.4962720076243083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,4,128,1,float16,fp8,0,0.48931201299031574
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,8,128,1,float16,fp8,0,0.48821866512298584
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,8,128,1,float16,float16,0,0.5024853150049845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,2,128,1,float16,float16,0,1.1421013673146565
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,1,128,1,float16,float16,0,0.28886934121449787
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,32,128,1,float16,float16,0,0.27084799607594806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,2,128,1,float16,float16,0,0.30085867643356323
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,2,128,1,float16,fp8,0,0.24446400006612143
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,4,128,1,float16,float16,0,0.26498667399088544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,4,128,1,float16,fp8,0,0.2632533311843872
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,8,128,1,float16,float16,0,0.2672906716664632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,8,128,1,float16,fp8,0,0.24448533852895102
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,32,32,128,1,float16,fp8,0,0.12769066294034323
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,1,128,1,float16,fp8,0,0.14784533778826395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,1,128,1,float16,float16,0,0.16524266203244528
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,32,1,128,1,float16,fp8,0,0.4880266586939494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,32,128,1,float16,float16,0,0.16502933700879416
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,2,128,1,float16,float16,0,0.16289599736531576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,32,128,1,float16,fp8,0,0.13962666193644205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,4,128,1,float16,float16,0,0.16524266203244528
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,4,128,1,float16,fp8,0,0.14939733346303305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,8,128,1,float16,float16,0,0.16225066781044006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,8,128,1,float16,fp8,0,0.13591466347376505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,1,128,1,float16,float16,0,0.09125333031018575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,1,128,1,float16,fp8,0,0.08392533659934998
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,2,128,1,float16,float16,0,0.09092266360918681
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,32,128,1,float16,float16,0,0.09914666414260864
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,2,128,1,float16,fp8,0,0.08262933293978374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,4,128,1,float16,float16,0,0.09012266993522644
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,4,128,1,float16,fp8,0,0.08941866954167683
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,8,128,1,float16,fp8,0,0.08267733454704285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,8,128,1,float16,float16,0,0.09098133444786072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,32,4,128,1,float16,float16,0,0.9592266877492269
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,32,128,1,float16,fp8,0,0.24789865811665854
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,1,128,1,float16,float16,0,1.865023930867513
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,1,128,1,float16,fp8,0,1.8238399823506672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,4,128,1,float16,float16,0,1.8700373967488606
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,2,128,1,float16,float16,0,1.8671627044677734
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,2,128,1,float16,fp8,0,1.7255573272705078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,32,2,128,1,float16,fp8,0,0.14826666315396628
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,1,128,1,float16,float16,0,0.9441973368326823
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,32,32,128,1,float16,fp8,0,0.08316799998283386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,8,128,1,float16,float16,0,1.874128023783366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,8,128,1,float16,fp8,0,1.7306666374206543
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,32,128,1,float16,fp8,0,0.8847466309865316
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,32,128,1,float16,float16,0,0.9684853553771973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,1,128,1,float16,fp8,0,0.8735199769337972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,32,1,128,1,float16,fp8,0,0.26343466838200885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,2,128,1,float16,float16,0,1.1034293174743652
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,2,128,1,float16,fp8,0,0.874288002649943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,4,128,1,float16,float16,0,0.947322686513265
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,4,128,1,float16,fp8,0,0.9266453584035238
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,8,128,1,float16,float16,0,0.9492800235748291
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,32,8,128,1,float16,fp8,0,0.8767200311024984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,1,128,1,float16,float16,0,0.5632160107294718
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,1,128,1,float16,fp8,0,0.47575998306274414
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,32,128,1,float16,fp8,0,0.4541066487630208
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,2,128,1,float16,fp8,0,0.4763520161310832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,2,128,1,float16,float16,0,0.5681706666946411
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,4,128,1,float16,fp8,0,0.4756693442662557
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,8,128,1,float16,float16,0,0.4880640109380086
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,8,128,1,float16,fp8,0,0.45049067338307697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,32,4,128,1,float16,fp8,0,1.8323040008544922
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,1,128,1,float16,float16,0,0.29148266712824505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,32,128,1,float16,fp8,0,0.2400533358256022
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,32,128,1,float16,float16,0,0.2626240054766337
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,1,128,1,float16,fp8,0,0.2530933419863383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,2,128,1,float16,float16,0,0.2916799982388814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,2,128,1,float16,fp8,0,0.23586666584014893
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,4,128,1,float16,float16,0,0.29078932603200275
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,4,128,1,float16,fp8,0,0.2529866695404053
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,8,128,1,float16,float16,0,0.25702399015426636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,1,128,1,float16,float16,0,0.15715733170509338
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,32,8,128,1,float16,fp8,0,0.23733866214752197
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,1,128,1,float16,fp8,0,0.12941333651542664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,2,128,1,float16,fp8,0,0.14173332850138345
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,2,128,1,float16,float16,0,0.1611253321170807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,32,128,1,float16,float16,0,0.14588800072669983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,4,128,1,float16,fp8,0,0.12954666217168173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,8,128,1,float16,float16,0,0.14409599701563516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,8,128,1,float16,fp8,0,0.12979732950528464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,1,128,1,float16,float16,0,0.09089066584904988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,1,128,1,float16,fp8,0,0.08138133088747661
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,32,128,1,float16,float16,0,0.08887466788291931
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,32,128,1,float16,float16,0,0.4976053237915039
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,2,128,1,float16,float16,0,0.08921600381533305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,2,128,1,float16,fp8,0,0.08070399860541026
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,4,128,1,float16,float16,0,0.0830506682395935
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,8,128,1,float16,float16,0,0.09030399719874065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,4,128,1,float16,fp8,0,0.08197866876920064
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,8,128,1,float16,fp8,0,0.0758133331934611
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,1,128,1,float16,float16,0,0.0588266650835673
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,1,128,1,float16,fp8,0,0.05300266544024149
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,32,128,1,float16,float16,0,0.05503466725349426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,2,128,1,float16,float16,0,0.05933333436648051
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,2,128,1,float16,fp8,0,0.05167999863624573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,32,128,1,float16,fp8,0,0.049914668003718056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,4,128,1,float16,float16,0,0.053717335065205894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,8,128,1,float16,float16,0,0.05400000015894572
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,8,128,1,float16,fp8,0,0.0532533327738444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,1,128,1,float16,float16,0,1.3733867009480794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,4,128,1,float16,float16,0,0.14260799686113992
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,32,32,128,1,float16,fp8,0,0.13195199767748514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,2,128,1,float16,float16,0,1.2235999902089436
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,32,32,128,1,float16,fp8,0,0.07689600189526875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,2,128,1,float16,fp8,0,1.1375412940979004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,32,4,128,1,float16,float16,0,0.5642613172531128
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,8,128,1,float16,float16,0,1.1702293554941814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,8,128,1,float16,fp8,0,1.0818986892700195
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,32,128,1,float16,fp8,0,0.5559733311335245
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,1,128,1,float16,float16,0,0.6892800331115723
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,32,128,1,float16,float16,0,0.6089066664377848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,1,128,1,float16,fp8,0,0.5782826741536459
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,2,128,1,float16,float16,0,0.6861226558685303
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,4,128,1,float16,float16,0,0.5914400021235148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,4,128,1,float16,fp8,0,0.5785706837972006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,1,128,1,float16,fp8,0,1.0775946776072185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,8,128,1,float16,float16,0,0.5950719912846884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,8,128,1,float16,fp8,0,0.5508853197097778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,32,128,1,float16,float16,0,0.32131733496983844
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,4,128,1,float16,float16,0,1.1660266717274983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,1,128,1,float16,float16,0,0.3465493520100911
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,32,128,1,float16,fp8,0,0.28861866394678753
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,2,128,1,float16,float16,0,0.3548266490300496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,2,128,1,float16,fp8,0,0.30114134152730304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,4,128,1,float16,float16,0,0.3279946645100911
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,4,128,1,float16,fp8,0,0.28618133068084717
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,32,4,128,1,float16,fp8,0,0.05193600058555603
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,8,128,1,float16,float16,0,0.3349546591440837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,32,128,1,float16,float16,0,0.18534932533899942
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,1,128,1,float16,float16,0,0.18450133005777994
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,1,128,1,float16,fp8,0,0.1630346675713857
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,2,128,1,float16,fp8,0,0.16261866688728333
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,32,2,128,1,float16,fp8,0,0.578165332476298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,2,128,1,float16,float16,0,0.1880906621615092
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,4,128,1,float16,float16,0,0.16595199704170227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,4,128,1,float16,fp8,0,0.16220266620318094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,8,128,1,float16,fp8,0,0.16180800398190817
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,8,128,1,float16,float16,0,0.1782240072886149
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,1,128,1,float16,float16,0,0.0993386705716451
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,1,128,1,float16,fp8,0,0.0937013328075409
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,32,128,1,float16,fp8,0,0.08841600020726521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,2,128,1,float16,float16,0,0.10116799672444661
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,2,128,1,float16,fp8,0,0.091648002465566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,4,128,1,float16,float16,0,0.10377599795659383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,32,4,128,1,float16,fp8,0,1.0795093377431233
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,1,128,1,float16,fp8,0,0.30110933383305866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,4,128,1,float16,fp8,0,0.08516266942024231
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,8,128,1,float16,float16,0,0.1011253297328949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,8,128,1,float16,fp8,0,0.09136000275611877
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,1,128,1,float16,float16,0,0.06118933359781901
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,1,128,1,float16,fp8,0,0.05632533133029938
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,2,128,1,float16,float16,0,0.061946665247281395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,32,128,1,float16,fp8,0,0.05243200063705444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,32,128,1,float16,float16,0,0.05926933387915293
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,32,8,128,1,float16,fp8,0,0.301253338654836
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,4,128,1,float16,float16,0,0.06136000156402588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,4,128,1,float16,fp8,0,0.05571199953556061
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,8,128,1,float16,float16,0,0.05606933434804281
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,8,128,1,float16,fp8,0,0.0544106662273407
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,32,32,128,1,float16,fp8,0,0.15537066260973612
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,1,128,1,float16,fp8,0,0.03736533224582672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,1,128,1,float16,float16,0,0.03799466788768768
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,32,128,1,float16,float16,0,0.037632000943024956
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,32,128,1,float16,fp8,0,0.037263999382654824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,2,128,1,float16,fp8,0,0.037461332976818085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,4,128,1,float16,float16,0,0.037402667105197906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,8,128,1,float16,float16,0,0.039061332742373146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,8,128,1,float16,fp8,0,0.03734400123357773
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,32,32,128,1,float16,float16,0,0.09852266311645508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,1,128,1,float16,float16,0,1.2250560124715169
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,1,128,1,float16,fp8,0,1.1365386644999187
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,2,128,1,float16,float16,0,1.2241066296895344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,32,2,128,1,float16,fp8,0,0.05602133274078369
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,4,128,1,float16,float16,0,1.4214240709940593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,8,128,1,float16,float16,0,1.2316693464914958
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,4,128,1,float16,fp8,0,1.1461280186971028
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,2,128,1,float16,float16,0,0.03938666731119156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,8,128,1,float16,fp8,0,1.1394240061442058
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,32,128,1,float16,float16,0,0.6386666695276896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,32,4,128,1,float16,fp8,0,0.0373333344856898
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,2,128,1,float16,float16,0,0.6201333204905192
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,1,128,1,float16,float16,0,0.7027573585510254
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,2,128,1,float16,fp8,0,0.5764319896697998
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,4,128,1,float16,fp8,0,0.6006346543629965
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,1,128,1,float16,fp8,0,0.576197346051534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,4,128,1,float16,float16,0,0.7172373135884603
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,8,128,1,float16,fp8,0,0.605135997136434
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,8,128,1,float16,float16,0,0.6250720024108887
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,32,32,128,1,float16,fp8,0,0.5867040157318115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,32,2,128,1,float16,fp8,0,1.1365919907887776
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,1,128,1,float16,float16,0,0.3195893367131551
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,32,128,1,float16,float16,0,0.3286133408546448
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,2,128,1,float16,fp8,0,0.31229867537816364
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,1,128,1,float16,fp8,0,0.3102239966392517
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,4,128,1,float16,float16,0,0.33528534571329754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,4,128,1,float16,fp8,0,0.31063999732335407
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,8,128,1,float16,fp8,0,0.31141867240269977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,32,128,1,float16,fp8,0,0.30011733373006183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,1,128,1,float16,float16,0,0.18478399515151978
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,2,128,1,float16,float16,0,0.18603734175364176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,32,128,1,float16,fp8,0,0.15869866808255514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,32,128,1,float16,float16,0,0.17408533891042074
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,1,128,1,float16,fp8,0,0.1572426656881968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,2,128,1,float16,fp8,0,0.1662773291269938
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,4,128,1,float16,fp8,0,0.16486933827400208
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,8,128,1,float16,float16,0,0.16971200704574585
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,8,128,1,float16,fp8,0,0.1605226695537567
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,1,128,1,float16,float16,0,0.10289067029953003
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,1,128,1,float16,fp8,0,0.09132267038027446
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,32,128,1,float16,fp8,0,0.08771199981371562
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,2,128,1,float16,float16,0,0.09679999947547913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,4,128,1,float16,float16,0,0.0983733336130778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,4,128,1,float16,fp8,0,0.08714133501052856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,8,128,1,float16,float16,0,0.10006933410962422
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,8,128,1,float16,fp8,0,0.09154133001963298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,2,128,1,float16,float16,0,0.35231467088063556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,32,128,1,float16,fp8,0,0.051498666405677795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,32,128,1,float16,float16,0,0.057536001006762184
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,1,128,1,float16,fp8,0,0.05390933156013489
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,32,8,128,1,float16,float16,0,0.32465600967407227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,2,128,1,float16,float16,0,0.05595199763774872
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,4,128,1,float16,float16,0,0.059903999169667564
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,2,128,1,float16,fp8,0,0.05264533559481303
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,8,128,1,float16,float16,0,0.05792533357938131
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,4,128,1,float16,fp8,0,0.05226666728655497
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,8,128,1,float16,fp8,0,0.05373866856098175
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,1,128,1,float16,float16,0,0.03790933390458425
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,1,128,1,float16,fp8,0,0.034874667723973594
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,32,4,128,1,float16,float16,0,0.16993600130081177
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,2,128,1,float16,float16,0,0.03769599894682566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,2,128,1,float16,fp8,0,0.03455466777086258
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,32,128,1,float16,float16,0,0.035429333647092186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,4,128,1,float16,fp8,0,0.0348693331082662
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,4,128,1,float16,float16,0,0.03917866696914037
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,8,128,1,float16,fp8,0,0.03469866762558619
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,8,128,1,float16,float16,0,0.035429333647092186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,1,128,1,float16,float16,0,0.02378133436044057
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,2,128,1,float16,fp8,0,0.09114133318265279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,32,128,1,float16,fp8,0,0.02126399924357732
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,2,128,1,float16,float16,0,0.023157333334287006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,32,128,1,float16,float16,0,0.02298133323589961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,4,128,1,float16,float16,0,0.022917332748572033
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,4,128,1,float16,fp8,0,0.021029333273569744
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,8,128,1,float16,float16,0,0.023370665808518726
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,8,128,1,float16,fp8,0,0.02107200026512146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,32,1,128,1,float16,float16,0,0.05681600173314413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,1,128,1,float16,float16,0,0.9082880020141602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,1,128,1,float16,fp8,0,0.8529173533121744
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,32,32,128,1,float16,fp8,0,0.03501333296298981
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,2,128,1,float16,float16,0,0.9185547033945719
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,1,128,1,float16,fp8,0,0.022597332795461018
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,32,32,128,1,float16,float16,0,0.09690666198730469
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,2,128,1,float16,fp8,0,0.8387253284454346
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,4,128,1,float16,float16,0,0.9074347019195557
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,32,128,1,float16,float16,0,0.4721440076828003
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,4,128,1,float16,fp8,0,0.8514506816864014
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,32,128,1,float16,fp8,0,0.42958935101826984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,8,128,1,float16,float16,0,0.913429339726766
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,1,128,1,float16,fp8,0,0.4374133348464966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,1,128,1,float16,float16,0,0.4596373240152995
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,2,128,1,float16,fp8,0,0.4404533306757609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,32,8,128,1,float16,fp8,0,0.8396960099538168
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,4,128,1,float16,float16,0,0.4631253480911255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,32,2,128,1,float16,fp8,0,0.02202133337656657
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,8,128,1,float16,float16,0,0.4636960029602051
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,8,128,1,float16,fp8,0,0.4262453317642212
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,1,128,1,float16,fp8,0,0.22705066204071045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,32,128,1,float16,float16,0,0.24369599421819052
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,1,128,1,float16,float16,0,0.25360532601674396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,4,128,1,float16,float16,0,0.23859200874964395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,4,128,1,float16,fp8,0,0.4254719813664754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,4,128,1,float16,fp8,0,0.22906132539113364
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,8,128,1,float16,float16,0,0.24940266211827597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,2,128,1,float16,float16,0,0.2529226740201314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,2,128,1,float16,fp8,0,0.2200266718864441
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,8,128,1,float16,fp8,0,0.21988266706466675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,2,128,1,float16,float16,0,0.12706133723258972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,1,128,1,float16,fp8,0,0.12272000312805176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,32,128,1,float16,fp8,0,0.121888001759847
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,32,32,128,1,float16,fp8,0,0.2218559980392456
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,1,128,1,float16,float16,0,0.13388267159461975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,4,128,1,float16,float16,0,0.1263146698474884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,32,128,1,float16,float16,0,0.12974400321642557
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,2,128,1,float16,fp8,0,0.11690133810043335
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,4,128,1,float16,fp8,0,0.11572266618410747
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,8,128,1,float16,float16,0,0.13357333342234293
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,1,128,1,float16,float16,0,0.0758133331934611
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,32,8,128,1,float16,fp8,0,0.1181173324584961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,32,128,1,float16,float16,0,0.07374399900436401
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,2,128,1,float16,float16,0,0.07075733443101247
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,32,128,1,float16,fp8,0,0.06599466502666473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,2,128,1,float16,fp8,0,0.06772799789905548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,8,128,1,float16,float16,0,0.0703306645154953
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,8,128,1,float16,fp8,0,0.06483733157316844
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,32,2,128,1,float16,float16,0,0.4763520161310832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,32,128,1,float16,float16,0,0.04763199885686239
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,1,128,1,float16,float16,0,0.04417066772778829
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,2,128,1,float16,float16,0,0.043680002291997276
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,4,128,1,float16,float16,0,0.04364266494909922
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,2,128,1,float16,fp8,0,0.03946666667858759
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,4,128,1,float16,fp8,0,0.03938666731119156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,8,128,1,float16,fp8,0,0.038047999143600464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,8,128,1,float16,float16,0,0.04381333291530609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,1,128,1,float16,float16,0,0.02775999903678894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,1,128,1,float16,fp8,0,0.025413334369659424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,32,128,1,float16,fp8,0,0.02475200096766154
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,1,128,1,float16,fp8,0,0.06778133412202199
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,2,128,1,float16,fp8,0,0.025418666501839954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,4,128,1,float16,fp8,0,0.025066666305065155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,4,128,1,float16,float16,0,0.07572266459465027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,32,4,128,1,float16,fp8,0,0.06795200208822887
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,8,128,1,float16,float16,0,0.026922665536403656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,8,128,1,float16,fp8,0,0.025274666647116344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,1,128,1,float16,float16,0,0.017312000195185345
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,1,128,1,float16,fp8,0,0.017173333714405697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,32,128,1,float16,fp8,0,0.03765333443880081
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,32,128,1,float16,fp8,0,0.015263999501864115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,32,1,128,1,float16,fp8,0,0.03802666564782461
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,2,128,1,float16,float16,0,0.017386666188637417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,4,128,1,float16,float16,0,0.017093333105246227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,4,128,1,float16,fp8,0,0.01481066644191742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,8,128,1,float16,float16,0,0.017850667238235474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,8,128,1,float16,fp8,0,0.01692266638080279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,1,128,1,float16,float16,0,0.016437333077192307
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,32,128,1,float16,float16,0,0.015087999403476715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,32,128,1,float16,fp8,0,0.015135999768972397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,32,128,1,float16,float16,0,0.027242665489514668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,2,128,1,float16,float16,0,0.015103999525308609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,2,128,1,float16,fp8,0,0.015103999525308609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,2,128,1,float16,float16,0,0.0281333327293396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,4,128,1,float16,float16,0,0.016757333030303318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,4,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,8,128,1,float16,float16,0,0.014991999914248785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,8,128,1,float16,fp8,0,0.015072000523408255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,32,4,128,1,float16,float16,0,0.026874666412671406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,1,128,1,float16,float16,0,0.38170135021209717
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,1,128,1,float16,fp8,0,0.344490647315979
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,2,128,1,float16,float16,0,0.3685813347498576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,2,128,1,float16,fp8,0,0.3489813407262166
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,32,128,1,float16,float16,0,0.017322666943073273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,32,2,128,1,float16,fp8,0,0.016879999389251072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,4,128,1,float16,float16,0,0.36953067779541016
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,32,128,1,float16,fp8,0,0.1767359972000122
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,1,128,1,float16,float16,0,0.19774933656056723
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,4,128,1,float16,fp8,0,0.3495519955952962
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,32,1,128,1,float16,fp8,0,0.014890667051076889
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,32,128,1,float16,float16,0,0.19708800315856934
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,8,128,1,float16,float16,0,0.3710453510284424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,32,8,128,1,float16,fp8,0,0.34490132331848145
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,1,128,1,float16,fp8,0,0.18293867508570352
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,2,128,1,float16,float16,0,0.19781333208084106
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,2,128,1,float16,fp8,0,0.1816533406575521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,4,128,1,float16,float16,0,0.19089066982269287
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,4,128,1,float16,fp8,0,0.1840160091718038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,8,128,1,float16,float16,0,0.19710934162139893
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,1,128,1,float16,float16,0,0.10661333799362183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,1,128,1,float16,fp8,0,0.09281599521636963
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,2,128,1,float16,float16,0,0.10386666655540466
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,32,128,1,float16,float16,0,0.1050986647605896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,32,128,1,float16,fp8,0,0.09451733032862346
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,32,8,128,1,float16,fp8,0,0.17813867330551147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,4,128,1,float16,float16,0,0.10608533024787903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,2,128,1,float16,fp8,0,0.09647466739018758
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,4,128,1,float16,fp8,0,0.09265599648157756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,1,128,1,float16,float16,0,0.05910933514436086
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,1,128,1,float16,fp8,0,0.05007466673851013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,2,128,1,float16,float16,0,0.056074668963750206
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,8,128,1,float16,float16,0,0.10172266761461894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,2,128,1,float16,fp8,0,0.05189333359400431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,4,128,1,float16,float16,0,0.05677333474159241
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,4,128,1,float16,fp8,0,0.05099200208981832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,8,128,1,float16,float16,0,0.05985066791375478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,32,128,1,float16,fp8,0,0.05193600058555603
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,8,128,1,float16,fp8,0,0.05232533315817515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,1,128,1,float16,fp8,0,0.03162133445342382
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,1,128,1,float16,float16,0,0.035605333745479584
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,32,128,1,float16,fp8,0,0.03151999910672506
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,2,128,1,float16,fp8,0,0.031632001201311745
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,4,128,1,float16,float16,0,0.0349440003434817
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,4,128,1,float16,fp8,0,0.03128000100453695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,8,128,1,float16,fp8,0,0.03138133386770884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,1,128,1,float16,float16,0,0.021269333859284718
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,1,128,1,float16,fp8,0,0.01918399954835574
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,32,128,1,float16,fp8,0,0.019258666783571243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,2,128,1,float16,fp8,0,0.018944000204404194
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,32,8,128,1,float16,fp8,0,0.09492799639701843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,4,128,1,float16,float16,0,0.020992000897725422
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,32,32,128,1,float16,float16,0,0.0721919983625412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,8,128,1,float16,float16,0,0.020960000654061634
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,4,128,1,float16,fp8,0,0.019205333044131596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,8,128,1,float16,fp8,0,0.019109333554903667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,1,128,1,float16,float16,0,0.014954666296641031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,32,128,1,float16,float16,0,0.03597866743803024
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,32,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,1,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,2,128,1,float16,float16,0,0.013568000247081121
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,8,128,1,float16,float16,0,0.03346133232116699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,4,128,1,float16,float16,0,0.013013333082199097
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,4,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,8,128,1,float16,float16,0,0.01312000056107839
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,8,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,1,128,1,float16,float16,0,0.012725333372751871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,32,128,1,float16,float16,0,0.020901332298914593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,1,128,1,float16,fp8,0,0.012810666114091873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,32,128,1,float16,float16,0,0.012736000120639801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,2,128,1,float16,fp8,0,0.012666666259368261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,4,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,8,128,1,float16,float16,0,0.012778667112191519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,8,128,1,float16,fp8,0,0.012800000607967377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,32,2,128,1,float16,float16,0,0.035274667044480644
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,32,128,1,float16,float16,0,0.012725333372751871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,2,128,1,float16,fp8,0,0.012794667234023413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,1,128,1,float16,float16,0,0.012714666624863943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,1,128,1,float16,fp8,0,0.012741333494583765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,32,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,2,128,1,float16,float16,0,0.012826666235923767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,2,128,1,float16,fp8,0,0.012730666746695837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,4,128,1,float16,float16,0,0.01314666618903478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,4,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,32,2,128,1,float16,float16,0,0.02128533273935318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,8,128,1,float16,fp8,0,0.01268799975514412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,32,8,128,1,float16,float16,0,0.012693333129088083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,32,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,4,128,1,float16,float16,0,0.012752000242471695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,1,128,1,float16,float16,0,0.23802665869394937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,32,32,128,1,float16,float16,0,0.013088000317414602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,1,128,1,float16,fp8,0,0.22610666354497275
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,2,128,1,float16,float16,0,0.23920534054438272
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,2,128,1,float16,fp8,0,0.22695465882619223
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,4,128,1,float16,float16,0,0.2407253384590149
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,4,128,1,float16,fp8,0,0.22469866275787354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,32,2,128,1,float16,float16,0,0.012991999586423239
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,8,128,1,float16,float16,0,0.23702933390935263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,1,128,1,float16,float16,0,0.12897599736849466
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,32,8,128,1,float16,fp8,0,0.22578666607538858
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,32,128,1,float16,fp8,0,0.11751466989517212
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,32,128,1,float16,float16,0,0.13133333126703897
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,2,128,1,float16,fp8,0,0.11709333459536235
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,2,128,1,float16,float16,0,0.1237386663754781
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,4,128,1,float16,fp8,0,0.11745066444079082
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,4,128,1,float16,float16,0,0.1237546702226003
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,8,128,1,float16,float16,0,0.12370666861534119
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,32,128,1,float16,fp8,0,0.06289066871007283
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,1,128,1,float16,fp8,0,0.06211733321348826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,2,128,1,float16,float16,0,0.0660159985224406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,8,128,1,float16,fp8,0,0.1199679970741272
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,4,128,1,float16,float16,0,0.06797866523265839
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,32,128,1,float16,float16,0,0.06724800169467926
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,2,128,1,float16,fp8,0,0.06214933097362518
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,8,128,1,float16,float16,0,0.06858666737874348
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,8,128,1,float16,fp8,0,0.06340266764163971
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,32,128,1,float16,fp8,0,0.03528533379236857
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,1,128,1,float16,float16,0,0.037615999579429626
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,32,128,1,float16,float16,0,0.037647999823093414
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,2,128,1,float16,float16,0,0.03718400001525879
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,32,1,128,1,float16,fp8,0,0.11922132968902588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,2,128,1,float16,fp8,0,0.03450666616360346
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,4,128,1,float16,fp8,0,0.03344533344109853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,8,128,1,float16,fp8,0,0.03428266694148382
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,4,128,1,float16,float16,0,0.037018666664759316
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,8,128,1,float16,float16,0,0.0373333344856898
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,1,128,1,float16,float16,0,0.06818133095900218
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,1,128,1,float16,float16,0,0.023344000180562336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,32,128,1,float16,float16,0,0.023024000227451324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,32,128,1,float16,fp8,0,0.021898667017618816
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,1,128,1,float16,fp8,0,0.02274133265018463
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,2,128,1,float16,float16,0,0.023370665808518726
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,2,128,1,float16,fp8,0,0.02237333357334137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,4,128,1,float16,float16,0,0.02330133318901062
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,4,128,1,float16,fp8,0,0.022624000906944275
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,8,128,1,float16,float16,0,0.022917332748572033
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,1,128,1,float16,float16,0,0.015194666882356008
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,32,8,128,1,float16,fp8,0,0.021957332889238994
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,1,128,1,float16,fp8,0,0.01481066644191742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,2,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,32,128,1,float16,float16,0,0.014864000181357065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,2,128,1,float16,float16,0,0.015072000523408255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,4,128,1,float16,fp8,0,0.014757333944241205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,8,128,1,float16,float16,0,0.015119999647140503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,8,128,1,float16,fp8,0,0.014874666929244995
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,1,128,1,float16,float16,0,0.010965333630641302
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,32,128,1,float16,fp8,0,0.010874666273593903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,2,128,1,float16,float16,0,0.010928000013033548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,32,1,128,1,float16,fp8,0,0.03346133232116699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,32,128,1,float16,float16,0,0.01073066641887029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,2,128,1,float16,fp8,0,0.010816000401973724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,4,128,1,float16,float16,0,0.01101333275437355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,4,128,1,float16,fp8,0,0.010741333166758219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,8,128,1,float16,float16,0,0.010981333752473196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,8,128,1,float16,fp8,0,0.010661333799362183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,1,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,1,128,1,float16,float16,0,0.0107893335322539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,32,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,2,128,1,float16,fp8,0,0.010656000425418219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,4,128,1,float16,float16,0,0.010543999572594961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,4,128,1,float16,fp8,0,0.010672000547250112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,8,128,1,float16,float16,0,0.010608000059922537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,8,128,1,float16,fp8,0,0.010757333288590113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,32,4,128,1,float16,fp8,0,0.061887999375661217
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,32,128,1,float16,fp8,0,0.009269333134094873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,32,128,1,float16,float16,0,0.00960533320903778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,1,128,1,float16,float16,0,0.01073066641887029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,2,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,2,128,1,float16,float16,0,0.010645333677530289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,4,128,1,float16,float16,0,0.010672000547250112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,32,1,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,4,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,8,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,8,128,1,float16,float16,0,0.010698666175206503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,2,128,1,float16,float16,0,0.0106133334338665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,1,128,1,float16,float16,0,0.20719999074935913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,1,128,1,float16,fp8,0,0.1922559936841329
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,2,128,1,float16,float16,0,0.20105600357055664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,2,128,1,float16,fp8,0,0.19125332434972128
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,4,128,1,float16,float16,0,0.20517865816752115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,32,128,1,float16,fp8,0,0.014853333433469137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,4,128,1,float16,fp8,0,0.19196800390879312
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,32,1,128,1,float16,fp8,0,0.009695999945203463
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,8,128,1,float16,float16,0,0.20182400941848755
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,1,128,1,float16,float16,0,0.1079253355662028
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,1,128,1,float16,fp8,0,0.10048000017801921
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,32,8,128,1,float16,fp8,0,0.1912320057551066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,2,128,1,float16,float16,0,0.11136000355084737
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,32,128,1,float16,fp8,0,0.09938666224479675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,2,128,1,float16,fp8,0,0.09919466574986775
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,32,32,128,1,float16,float16,0,0.01091733326514562
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,4,128,1,float16,fp8,0,0.10152533650398254
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,8,128,1,float16,float16,0,0.10897066195805867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,8,128,1,float16,fp8,0,0.10059199730555217
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,1,128,1,float16,float16,0,0.05743999779224396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,32,128,1,float16,float16,0,0.05880533158779144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,1,128,1,float16,fp8,0,0.05403199791908264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,32,128,1,float16,fp8,0,0.0529120018084844
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,2,128,1,float16,fp8,0,0.05337599913279215
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,4,128,1,float16,fp8,0,0.0537066658337911
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,32,4,128,1,float16,float16,0,0.014853333433469137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,8,128,1,float16,fp8,0,0.05400000015894572
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,8,128,1,float16,float16,0,0.057909334699312844
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,1,128,1,float16,float16,0,0.032560000816980995
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,1,128,1,float16,fp8,0,0.02941333254178365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,32,128,1,float16,float16,0,0.03331200033426285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,32,128,1,float16,fp8,0,0.02920000006755193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,2,128,1,float16,float16,0,0.0317493329445521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,32,128,1,float16,float16,0,0.10755200187365214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,4,128,1,float16,float16,0,0.032826667030652366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,4,128,1,float16,fp8,0,0.029167999823888142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,8,128,1,float16,fp8,0,0.029343999922275543
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,8,128,1,float16,float16,0,0.032960000137488045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,32,128,1,float16,float16,0,0.021007999777793884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,32,128,1,float16,fp8,0,0.01915733392039935
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,1,128,1,float16,fp8,0,0.01926933353145917
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,2,128,1,float16,float16,0,0.02089600016673406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,2,128,1,float16,fp8,0,0.019199999670187633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,4,128,1,float16,float16,0,0.02092266579469045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,2,128,1,float16,float16,0,0.05648533503214518
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,4,128,1,float16,fp8,0,0.018906666586796444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,8,128,1,float16,float16,0,0.021018666525681812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,8,128,1,float16,fp8,0,0.01897066707412402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,1,128,1,float16,float16,0,0.013088000317414602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,32,128,1,float16,float16,0,0.014762666076421738
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,32,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,1,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,2,128,1,float16,float16,0,0.013162666310866674
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,4,128,1,float16,float16,0,0.014544000228246054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,32,2,128,1,float16,fp8,0,0.02938666691382726
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,2,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,4,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,8,128,1,float16,float16,0,0.014848000059525171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,32,8,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,1,128,1,float16,float16,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,1,128,1,float16,fp8,0,0.009461333354314169
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,32,128,1,float16,fp8,0,0.010575999816258749
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,32,128,1,float16,float16,0,0.010640000303586325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,32,4,128,1,float16,float16,0,0.10794666409492493
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,4,128,1,float16,float16,0,0.010762666662534079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,4,128,1,float16,fp8,0,0.010608000059922537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,32,1,128,1,float16,float16,0,0.021274665991465252
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,8,128,1,float16,fp8,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,8,128,1,float16,float16,0,0.010863999525705973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,1,128,1,float16,float16,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,1,128,1,float16,fp8,0,0.008832000195980072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,2,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,2,128,1,float16,float16,0,0.00973866693675518
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,4,128,1,float16,float16,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,32,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,32,4,128,1,float16,float16,0,0.05776533484458923
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,8,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,1,128,1,float16,float16,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,1,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,32,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,32,128,1,float16,float16,0,0.009130666653315226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,2,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,4,128,1,float16,float16,0,0.009072000160813332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,4,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,8,128,1,float16,float16,0,0.00873066671192646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,8,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,2,128,1,float16,float16,0,0.010677333921194077
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,32,2,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,1,128,1,float16,float16,0,0.18741865952809653
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,32,128,1,float16,float16,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,2,128,1,float16,float16,0,0.18235733111699423
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,4,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,2,128,1,float16,fp8,0,0.17277334133783975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,32,8,128,1,float16,float16,0,0.009413333609700203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,4,128,1,float16,float16,0,0.18505066633224487
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,32,2,128,1,float16,float16,0,0.009088000282645226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,4,128,1,float16,fp8,0,0.17375467220942178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,8,128,1,float16,float16,0,0.18306134144465128
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,32,128,1,float16,float16,0,0.09757866462071736
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,8,128,1,float16,fp8,0,0.17269867658615112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,1,128,1,float16,float16,0,0.09943999846776326
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,1,128,1,float16,fp8,0,0.09096533060073853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,2,128,1,float16,float16,0,0.09734400113423665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,2,128,1,float16,fp8,0,0.09169600407282512
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,4,128,1,float16,fp8,0,0.09122666716575623
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,4,128,1,float16,float16,0,0.09690666198730469
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,8,128,1,float16,float16,0,0.09692266583442688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,8,128,1,float16,fp8,0,0.09063466389973958
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,32,1,128,1,float16,fp8,0,0.17271467049916586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,32,128,1,float16,fp8,0,0.04966400067011515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,1,128,1,float16,fp8,0,0.04854933420817057
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,2,128,1,float16,float16,0,0.055311997731526695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,2,128,1,float16,fp8,0,0.04937066634496053
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,4,128,1,float16,float16,0,0.054058666030565895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,4,128,1,float16,fp8,0,0.04966400067011515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,8,128,1,float16,float16,0,0.05195199946562449
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,8,128,1,float16,fp8,0,0.048613334695498146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,1,128,1,float16,float16,0,0.031152000029881794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,32,128,1,float16,float16,0,0.031311998764673867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,32,128,1,float16,fp8,0,0.027210667729377747
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,2,128,1,float16,fp8,0,0.02737066646416982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,4,128,1,float16,float16,0,0.031194667021433514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,4,128,1,float16,fp8,0,0.027424000203609467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,8,128,1,float16,float16,0,0.031317333380381264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,8,128,1,float16,fp8,0,0.027424000203609467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,32,32,128,1,float16,fp8,0,0.09095999598503113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,32,128,1,float16,float16,0,0.01937066639463107
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,32,128,1,float16,fp8,0,0.01894933357834816
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,1,128,1,float16,fp8,0,0.01926400015751521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,2,128,1,float16,float16,0,0.01918399954835574
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,4,128,1,float16,float16,0,0.019061333189407986
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,4,128,1,float16,fp8,0,0.01893866683046023
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,32,128,1,float16,float16,0,0.05563733478387197
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,8,128,1,float16,float16,0,0.01915733392039935
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,32,1,128,1,float16,float16,0,0.05398400127887726
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,8,128,1,float16,fp8,0,0.019061333189407986
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,32,128,1,float16,fp8,0,0.012719999998807907
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,1,128,1,float16,float16,0,0.013023999830087027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,32,128,1,float16,float16,0,0.01301866645614306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,1,128,1,float16,fp8,0,0.012629333883523941
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,2,128,1,float16,float16,0,0.03150933235883713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,2,128,1,float16,float16,0,0.013167999684810638
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,4,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,2,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,8,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,32,128,1,float16,float16,0,0.010682666053374609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,32,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,1,128,1,float16,float16,0,0.019109333554903667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,1,128,1,float16,float16,0,0.010784000158309937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,2,128,1,float16,float16,0,0.010784000158309937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,1,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,32,2,128,1,float16,fp8,0,0.01884799947341283
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,2,128,1,float16,fp8,0,0.009690666571259499
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,4,128,1,float16,float16,0,0.01062400018175443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,4,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,8,128,1,float16,float16,0,0.010565333068370819
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,32,8,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,1,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,1,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,2,128,1,float16,float16,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,32,128,1,float16,float16,0,0.009082666908701261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,32,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,4,128,1,float16,float16,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,8,128,1,float16,float16,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,8,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,32,1,128,1,float16,fp8,0,0.027306665976842243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,1,128,1,float16,float16,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,4,128,1,float16,float16,0,0.012975999464591345
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,1,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,32,128,1,float16,fp8,0,0.008799999952316284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,32,128,1,float16,float16,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,2,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,4,128,1,float16,float16,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,8,128,1,float16,float16,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,4,128,1,float16,fp8,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,8,128,1,float16,fp8,0,0.008661333471536636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,1,128,1,float16,fp8,0,0.16755733887354532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,1,128,1,float16,float16,0,0.17873066663742065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,2,128,1,float16,float16,0,0.18130133549372354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,2,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,2,128,1,float16,fp8,0,0.16637333234151205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,32,4,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,4,128,1,float16,float16,0,0.18438933293024698
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,4,128,1,float16,fp8,0,0.1665600041548411
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,8,128,1,float16,float16,0,0.18040533860524496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,32,2,128,1,float16,float16,0,0.008858666444818178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,32,8,128,1,float16,float16,0,0.012784000486135483
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,32,128,1,float16,float16,0,0.09733866651852925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,1,128,1,float16,float16,0,0.09804266691207886
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,1,128,1,float16,fp8,0,0.0876693328221639
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,32,128,1,float16,fp8,0,0.0881866713364919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,2,128,1,float16,float16,0,0.09872000416119893
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,2,128,1,float16,fp8,0,0.08845333258310954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,4,128,1,float16,fp8,0,0.08810133735338847
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,8,128,1,float16,float16,0,0.09496532877286275
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,1,128,1,float16,float16,0,0.05401599903901418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,8,128,1,float16,fp8,0,0.08672533432642619
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,1,128,1,float16,fp8,0,0.04774933556715647
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,2,128,1,float16,fp8,0,0.048250665267308555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,2,128,1,float16,float16,0,0.054832001527150474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,32,128,1,float16,float16,0,0.05372266471385956
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,4,128,1,float16,float16,0,0.05187733471393585
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,4,128,1,float16,fp8,0,0.04754666487375895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,8,128,1,float16,float16,0,0.05202133456865946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,8,128,1,float16,fp8,0,0.04786666731039683
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,32,128,1,float16,fp8,0,0.02756800005833308
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,1,128,1,float16,float16,0,0.03127466638882955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,1,128,1,float16,fp8,0,0.02739199995994568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,2,128,1,float16,float16,0,0.031013332307338715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,32,128,1,float16,float16,0,0.030245333909988403
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,2,128,1,float16,fp8,0,0.027471999327341717
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,4,128,1,float16,float16,0,0.02945599953333537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,4,128,1,float16,fp8,0,0.02739199995994568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,8,128,1,float16,fp8,0,0.02749866743882497
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,32,8,128,1,float16,float16,0,0.030394665896892548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,1,128,1,float16,float16,0,0.019237333287795384
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,32,8,128,1,float16,fp8,0,0.16798933347066244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,2,128,1,float16,fp8,0,0.018794666975736618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,32,128,1,float16,float16,0,0.01922133316596349
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,4,128,1,float16,float16,0,0.019280000279347103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,8,128,1,float16,float16,0,0.019141333798567455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,8,128,1,float16,fp8,0,0.01876266673207283
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,32,4,128,1,float16,float16,0,0.09340266386667888
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,1,128,1,float16,float16,0,0.012789333860079447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,32,128,1,float16,float16,0,0.013173333058754602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,1,128,1,float16,fp8,0,0.012719999998807907
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,32,32,128,1,float16,fp8,0,0.04780800143877665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,32,128,1,float16,fp8,0,0.012655999511480331
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,2,128,1,float16,fp8,0,0.012837332983811697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,4,128,1,float16,float16,0,0.013173333058754602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,4,128,1,float16,fp8,0,0.012810666114091873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,8,128,1,float16,float16,0,0.013077333569526672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,8,128,1,float16,fp8,0,0.012837332983811697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,32,128,1,float16,float16,0,0.010682666053374609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,32,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,1,128,1,float16,float16,0,0.010784000158309937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,1,128,1,float16,fp8,0,0.009119999905427298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,2,128,1,float16,float16,0,0.00966933307548364
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,1,128,1,float16,fp8,0,0.019007999449968338
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,4,128,1,float16,float16,0,0.010714666297038397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,4,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,4,128,1,float16,fp8,0,0.017658667018016178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,8,128,1,float16,fp8,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,8,128,1,float16,float16,0,0.010842667271693548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,1,128,1,float16,float16,0,0.008762666955590248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,32,128,1,float16,float16,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,1,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,32,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,32,2,128,1,float16,float16,0,0.013066666821638743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,2,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,4,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,8,128,1,float16,float16,0,0.00884799969693025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,8,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,32,128,1,float16,fp8,0,0.008629333227872849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,32,128,1,float16,float16,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,1,128,1,float16,float16,0,0.009066666786869368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,1,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,2,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,32,2,128,1,float16,fp8,0,0.009114666531483332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,2,128,1,float16,fp8,0,0.00867733359336853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,4,128,1,float16,float16,0,0.008714666590094566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,4,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,8,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,32,8,128,1,float16,fp8,0,0.00877333308259646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,1,128,1,float16,float16,0,0.012917333592971167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,2,128,1,float16,float16,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,1,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,32,4,128,1,float16,fp8,0,0.009093333035707474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,0,0.01488000030318896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,0,0.027061333258946735
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,0,0.023397333920001984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,32,128,1,float16,fp8,0,0.01903466631968816
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,32,128,1,float16,fp8,0,0.03942399968703588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,1,128,1,float16,float16,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,32,128,1,float16,float16,0,0.05227733155091604
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,1,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,0,0.010778666784365972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,0,0.012608000387748083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,0,0.017071999609470367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,0,0.014725333700577417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,1,128,1,float16,float16,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,0,0.014250667144854864
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,32,128,1,float16,fp8,0,0.02292799949645996
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,1,128,1,float16,fp8,0,0.008629333227872849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,0,0.0069759997228781385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,0,0.016949333250522614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,0,0.011226666470368704
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,0,0.010879999647537867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,1,128,1,float16,float16,0,0.008623999853928884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,1,128,1,float16,fp8,0,0.008853333070874214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,32,128,1,float16,float16,0,0.016976000120242436
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,0,0.0074346667776505155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,0,0.008656000097592672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,0,0.007002666592597961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,32,2,128,1,float16,float16,0,0.01942933350801468
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,0,0.009103999783595404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,0,0.008778666456540426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,1,128,1,float16,float16,0,0.008762666955590248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,1,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,0,0.009061333412925402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,0,0.007402666533986728
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,32,128,1,float16,float16,0,0.011610666910807291
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,32,128,1,float16,fp8,0,0.010703999549150467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,0,0.00697066696981589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,0,0.006762666627764702
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,0,0.0068693334857622785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,0,0.007936000203092894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,0,0.006618666773041089
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,1,128,1,float16,float16,0,0.008634666601816813
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,0,0.006895999734600385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,32,128,1,float16,fp8,0,0.009317333499590555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,32,32,128,1,float16,float16,0,0.027317332724730175
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,0,0.006917333230376244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,0,0.006911999856432279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,0,0.006965333595871925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,0,0.006693333387374878
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,1,128,1,float16,float16,0,0.008810666700204214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,1,128,1,float16,fp8,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,32,128,1,float16,float16,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,32,128,1,float16,fp8,0,0.006949333474040031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,0,0.007040000210205714
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,0,0.008373333141207695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,0,0.0068693334857622785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,32,32,128,1,float16,fp8,0,0.014752000570297241
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,0,0.006853333363930385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,0,0.00706666645904382
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,0,0.007007999966541926
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,0,0.006624000146985054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,1,128,1,float16,float16,0,0.008874666566650072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,1,128,1,float16,fp8,0,0.008858666444818178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,0,0.0069919998447100324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,32,128,1,float16,float16,0,0.006853333363930385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,0,0.006890666360656421
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,0,0.007034666836261749
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,0,0.006693333387374878
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,0,0.007050666958093643
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,0,0.006746666505932808
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,1,128,1,float16,float16,0,0.00879466657837232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,1,128,1,float16,fp8,0,0.008629333227872849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,32,128,1,float16,float16,0,0.006853333363930385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,32,128,1,float16,fp8,0,0.006725333631038666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,0,0.006741333131988843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,1,128,1,float16,fp8,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,0,0.006810666372378667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,0,0.006927999978264173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,0,0.006682666639486949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,0,0.006698666761318843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,0,0.006927999978264173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,32,32,128,1,float16,fp8,0,0.006895999734600385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,0,0.0069440001000960665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,32,32,128,1,float16,float16,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,1,128,1,float16,fp8,0,7.628063837687175
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,1,128,1,float16,float16,0,8.710437138875326
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,2,128,1,float16,float16,0,10.72991943359375
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,24,128,1,float16,float16,0,4.206437428792317
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,24,128,1,float16,fp8,0,3.8888959884643555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,4,128,1,float16,fp8,0,7.632106781005859
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,8,128,1,float16,fp8,0,7.627056121826172
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,8,128,1,float16,float16,0,8.27131207784017
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,1,128,1,float16,float16,0,5.450421651204427
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,2,128,1,float16,fp8,0,7.624928156534831
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,1,128,1,float16,fp8,0,3.8733654022216797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,2,128,1,float16,float16,0,4.676191965738933
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,2,128,1,float16,fp8,0,4.128671964009603
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,4,128,1,float16,fp8,0,3.8761440912882485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,4,128,1,float16,float16,0,4.456591924031575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,8,128,1,float16,fp8,0,3.8727572758992515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,24,4,128,1,float16,float16,0,9.313231786092123
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,24,128,1,float16,float16,0,2.1690026919047036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,24,8,128,1,float16,float16,0,4.584501266479492
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,1,128,1,float16,float16,0,2.613706588745117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,24,128,1,float16,fp8,0,2.255114714304606
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,1,128,1,float16,fp8,0,2.1219679514567056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,2,128,1,float16,fp8,0,2.120896021525065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,2,128,1,float16,float16,0,2.6459253629048667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,4,128,1,float16,float16,0,2.6410346031188965
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,4,128,1,float16,fp8,0,2.120464007059733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,8,128,1,float16,float16,0,2.189290682474772
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,1,128,1,float16,float16,0,1.1437439918518066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,2,128,1,float16,fp8,0,1.1205600102742512
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,2,128,1,float16,float16,0,1.3727787335713704
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,4,128,1,float16,float16,0,1.3510133425394695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,4,128,1,float16,fp8,0,1.0598986943562825
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,8,128,1,float16,float16,0,1.3761332829793294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,8,128,1,float16,fp8,0,1.1699679692586262
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,1,128,1,float16,fp8,0,4.413834571838379
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,1,128,1,float16,float16,0,5.473791758219401
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,24,128,1,float16,float16,0,1.1615573565165203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,2,128,1,float16,fp8,0,4.736469268798828
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,2,128,1,float16,float16,0,6.314138412475586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,4,128,1,float16,float16,0,4.774922688802083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,4,128,1,float16,fp8,0,4.853647867838542
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,8,128,1,float16,float16,0,4.776864051818848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,24,8,128,1,float16,fp8,0,4.418357213338216
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,24,128,1,float16,float16,0,3.3183466593424478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,24,128,1,float16,fp8,0,1.0579253037770588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,24,128,1,float16,fp8,0,2.255930741628011
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,1,128,1,float16,float16,0,2.952416102091471
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,1,128,1,float16,fp8,0,2.4034345944722495
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,2,128,1,float16,float16,0,2.4395519892374673
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,2,128,1,float16,fp8,0,2.4515040715535483
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,4,128,1,float16,float16,0,2.440831979115804
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,4,128,1,float16,fp8,0,2.4027253786722818
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,24,128,1,float16,float16,0,1.2738080024719238
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,8,128,1,float16,fp8,0,2.2529600461324057
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,24,128,1,float16,fp8,0,1.2571252981821697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,1,128,1,float16,fp8,0,1.1719146569569905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,1,128,1,float16,float16,0,1.5489919980367024
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,2,128,1,float16,fp8,0,1.1716533501942952
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,2,128,1,float16,float16,0,1.5378400484720867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,4,128,1,float16,fp8,0,1.172005335489909
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,4,128,1,float16,float16,0,1.5231040318806965
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,8,128,1,float16,float16,0,1.2715306282043457
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,24,8,128,1,float16,fp8,0,1.247930685679118
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,1,128,1,float16,float16,0,0.8081759611765543
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,24,128,1,float16,float16,0,0.6872053146362305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,1,128,1,float16,fp8,0,0.6304426590601603
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,24,128,1,float16,fp8,0,0.6326239903767904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,2,128,1,float16,float16,0,0.7903573513031006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,2,128,1,float16,fp8,0,0.7115786870320638
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,4,128,1,float16,fp8,0,0.6317386627197266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,4,128,1,float16,float16,0,0.8111786842346191
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,8,128,1,float16,fp8,0,0.674453337987264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,24,8,128,1,float16,float16,0,2.8565759658813477
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,1,128,1,float16,fp8,0,3.1367626190185547
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,1,128,1,float16,float16,0,3.9977601369222007
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,2,128,1,float16,fp8,0,3.3458452224731445
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,2,128,1,float16,float16,0,4.0092159907023115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,24,1,128,1,float16,fp8,0,1.1746400197347004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,4,128,1,float16,float16,0,3.394746780395508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,4,128,1,float16,fp8,0,3.137653350830078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,24,8,128,1,float16,float16,0,0.6863733132680258
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,8,128,1,float16,float16,0,3.5486933390299478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,24,8,128,1,float16,fp8,0,3.4278345108032227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,24,128,1,float16,fp8,0,1.6092640558878581
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,1,128,1,float16,float16,0,2.144159952799479
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,1,128,1,float16,fp8,0,1.7167840003967285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,2,128,1,float16,float16,0,1.757754643758138
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,2,128,1,float16,fp8,0,1.6148746808369954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,4,128,1,float16,float16,0,1.8525439898173015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,4,128,1,float16,fp8,0,1.7479359308878581
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,8,128,1,float16,fp8,0,1.62444273630778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,24,8,128,1,float16,fp8,0,2.398922602335612
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,24,128,1,float16,float16,0,0.9196586608886719
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,1,128,1,float16,float16,0,0.9499573707580566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,24,128,1,float16,fp8,0,0.8971253236134847
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,1,128,1,float16,fp8,0,0.8448853492736816
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,2,128,1,float16,float16,0,0.9120426972707113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,4,128,1,float16,float16,0,0.9539413452148438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,4,128,1,float16,fp8,0,0.8994399706522623
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,8,128,1,float16,float16,0,0.9118346373240153
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,24,128,1,float16,float16,0,1.7554559707641602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,8,128,1,float16,fp8,0,0.8429333368937174
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,24,128,1,float16,float16,0,0.5026079813639323
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,1,128,1,float16,float16,0,0.5339306592941284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,1,128,1,float16,fp8,0,0.493125319480896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,24,128,1,float16,fp8,0,0.4728586673736572
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,2,128,1,float16,float16,0,0.5105546712875366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,4,128,1,float16,fp8,0,0.4907093445460002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,8,128,1,float16,float16,0,0.4997013409932454
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,8,128,1,float16,fp8,0,0.46173866589864093
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,24,8,128,1,float16,float16,0,1.7375359535217285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,1,128,1,float16,float16,0,4.665674527486165
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,1,128,1,float16,fp8,0,4.333045323689778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,24,2,128,1,float16,fp8,0,0.8984959920247396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,2,128,1,float16,fp8,0,4.083776156107585
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,2,128,1,float16,float16,0,4.9280961354573565
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,2,128,1,float16,fp8,0,0.45899732907613117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,24,4,128,1,float16,float16,0,0.5009386539459229
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,4,128,1,float16,float16,0,4.400277455647786
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,24,128,1,float16,float16,0,2.2481600443522134
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,4,128,1,float16,fp8,0,4.3622134526570635
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,24,128,1,float16,fp8,0,2.1734186808268228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,1,128,1,float16,float16,0,2.2359253565470376
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,1,128,1,float16,fp8,0,2.0735573768615723
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,8,128,1,float16,float16,0,4.405850728352864
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,2,128,1,float16,float16,0,2.235098679860433
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,2,128,1,float16,fp8,0,2.2230079968770347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,4,128,1,float16,float16,0,2.2463040351867676
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,4,128,1,float16,fp8,0,2.230682690938314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,8,128,1,float16,float16,0,2.239232063293457
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,24,128,1,float16,float16,0,1.1586133639017742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,24,128,1,float16,fp8,0,1.070677359898885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,1,128,1,float16,float16,0,1.1549332936604817
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,1,128,1,float16,fp8,0,1.1405920187632244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,2,128,1,float16,float16,0,1.2362453142801921
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,2,128,1,float16,fp8,0,1.141701300938924
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,4,128,1,float16,fp8,0,1.0713813304901123
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,8,128,1,float16,float16,0,1.1618560155232747
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,8,128,1,float16,fp8,0,1.1419520378112793
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,1,128,1,float16,float16,0,0.6173493464787801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,24,128,1,float16,float16,0,0.6152426799138387
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,24,128,1,float16,fp8,0,0.5681973298390707
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,1,128,1,float16,fp8,0,0.6047840118408203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,2,128,1,float16,float16,0,0.6542506615320841
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,2,128,1,float16,fp8,0,0.5671360095342001
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,24,8,128,1,float16,fp8,0,4.087434768676758
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,4,128,1,float16,fp8,0,0.5665653149286906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,8,128,1,float16,fp8,0,0.5682400067647299
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,8,128,1,float16,float16,0,0.6148320039113363
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,1,128,1,float16,float16,0,0.39053865273793537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,24,128,1,float16,float16,0,0.3454666535059611
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,2,128,1,float16,float16,0,0.3420373201370239
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,24,128,1,float16,fp8,0,0.31863999366760254
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,2,128,1,float16,fp8,0,0.3373813231786092
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,4,128,1,float16,fp8,0,0.31692800919214886
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,24,8,128,1,float16,fp8,0,2.0754987398783364
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,8,128,1,float16,float16,0,0.34544531504313153
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,8,128,1,float16,fp8,0,0.3158506751060486
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,24,4,128,1,float16,float16,0,1.2306880156199138
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,1,128,1,float16,float16,0,2.5882879892985025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,1,128,1,float16,fp8,0,2.57914129892985
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,2,128,1,float16,float16,0,2.5980480511983237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,2,128,1,float16,fp8,0,2.4099626541137695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,24,4,128,1,float16,float16,0,0.6569226582845052
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,4,128,1,float16,float16,0,3.127749443054199
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,1,128,1,float16,fp8,0,0.33644266923268634
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,24,128,1,float16,float16,0,1.3331200281778972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,8,128,1,float16,float16,0,2.6415093739827475
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,8,128,1,float16,fp8,0,2.5752693812052407
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,24,4,128,1,float16,float16,0,0.3438026507695516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,24,128,1,float16,fp8,0,1.317423979441325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,1,128,1,float16,fp8,0,1.315344015757243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,1,128,1,float16,float16,0,1.732063929239909
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,2,128,1,float16,float16,0,1.3228533267974854
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,4,128,1,float16,float16,0,1.6089706420898438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,8,128,1,float16,float16,0,1.4078133900960286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,8,128,1,float16,fp8,0,1.313157320022583
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,1,128,1,float16,float16,0,0.8203306992848715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,24,128,1,float16,fp8,0,0.6445333162943522
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,1,128,1,float16,fp8,0,0.6839839617411295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,2,128,1,float16,float16,0,0.8038880030314127
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,2,128,1,float16,fp8,0,0.6843199729919434
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,4,128,1,float16,float16,0,0.8261280059814453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,4,128,1,float16,fp8,0,0.686346689860026
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,24,4,128,1,float16,fp8,0,2.4091787338256836
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,8,128,1,float16,float16,0,0.6915520032246908
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,8,128,1,float16,fp8,0,0.6854133605957031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,1,128,1,float16,float16,0,0.43108801047007245
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,24,128,1,float16,fp8,0,0.34727466106414795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,24,128,1,float16,float16,0,0.37773334980010986
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,2,128,1,float16,float16,0,0.37409599622090656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,2,128,1,float16,fp8,0,0.37515731652577716
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,2,128,1,float16,fp8,0,1.3178186416625977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,4,128,1,float16,float16,0,0.43122665087382
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,4,128,1,float16,fp8,0,0.3473120133082072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,8,128,1,float16,fp8,0,0.3715626796086629
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,8,128,1,float16,float16,0,0.37509334087371826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,1,128,1,float16,float16,0,0.24201067288716635
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,1,128,1,float16,fp8,0,0.21508800983428955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,24,128,1,float16,float16,0,0.21926933526992798
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,2,128,1,float16,float16,0,0.21984533468882242
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,24,128,1,float16,fp8,0,0.20065067211786905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,4,128,1,float16,float16,0,0.24110400676727295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,4,128,1,float16,fp8,0,0.2003999948501587
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,8,128,1,float16,fp8,0,0.19824000199635824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,8,128,1,float16,float16,0,0.23296533028284708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,1,128,1,float16,float16,0,2.61898136138916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,2,128,1,float16,float16,0,2.4684425989786782
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,24,1,128,1,float16,fp8,0,0.36840001742045086
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,1,128,1,float16,fp8,0,2.465754667917887
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,24,4,128,1,float16,fp8,0,1.2318720022837322
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,4,128,1,float16,fp8,0,2.463733355204264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,24,24,128,1,float16,float16,0,0.7325812975565592
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,8,128,1,float16,float16,0,2.6261547406514487
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,8,128,1,float16,fp8,0,2.311903953552246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,24,128,1,float16,float16,0,1.2725173632303874
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,1,128,1,float16,float16,0,1.2526986598968506
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,2,128,1,float16,float16,0,1.255621353785197
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,24,128,1,float16,fp8,0,1.1803146998087566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,2,128,1,float16,fp8,0,1.1729706923166912
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,4,128,1,float16,float16,0,1.2560373147328694
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,4,128,1,float16,fp8,0,1.174336036046346
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,8,128,1,float16,float16,0,1.2601386706034343
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,8,128,1,float16,fp8,0,1.1778613726298015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,24,128,1,float16,float16,0,0.6563040018081665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,24,128,1,float16,fp8,0,0.6092640161514282
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,1,128,1,float16,fp8,0,0.6467039982477824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,4,128,1,float16,float16,0,2.5780693689982095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,24,2,128,1,float16,fp8,0,0.21417067448298135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,2,128,1,float16,float16,0,0.757525364557902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,4,128,1,float16,float16,0,0.6496320168177286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,4,128,1,float16,fp8,0,0.6457706689834595
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,8,128,1,float16,float16,0,0.6514559984207153
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,8,128,1,float16,fp8,0,0.6456640164057413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,24,1,128,1,float16,fp8,0,1.222266674041748
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,1,128,1,float16,float16,0,0.3906986713409424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,24,128,1,float16,fp8,0,0.34677334626515705
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,24,128,1,float16,float16,0,0.3510773181915283
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,1,128,1,float16,fp8,0,0.34268800417582196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,2,128,1,float16,fp8,0,0.3465493520100911
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,2,128,1,float16,float16,0,0.3722879886627197
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,4,128,1,float16,float16,0,0.3657120068868001
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,24,2,128,1,float16,fp8,0,2.4063092867533364
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,8,128,1,float16,float16,0,0.3470986684163411
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,8,128,1,float16,fp8,0,0.3460959990819295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,24,128,1,float16,float16,0,0.1978666583697001
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,1,128,1,float16,float16,0,0.2207039992014567
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,24,128,1,float16,fp8,0,0.18265066544214884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,4,128,1,float16,float16,0,0.19543466965357462
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,4,128,1,float16,fp8,0,0.18043200174967447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,1,128,1,float16,float16,0,0.7705439726511637
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,8,128,1,float16,fp8,0,0.19291200240453085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,8,128,1,float16,float16,0,0.19667200247446695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,24,2,128,1,float16,fp8,0,0.6452906529108683
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,1,128,1,float16,float16,0,0.13091733058293661
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,24,128,1,float16,float16,0,0.11928000052769978
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,24,128,1,float16,fp8,0,0.11748799681663513
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,1,128,1,float16,fp8,0,0.11587199568748474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,2,128,1,float16,float16,0,0.12989866733551025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,2,128,1,float16,fp8,0,0.11595199505488078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,4,128,1,float16,float16,0,0.12225066622098286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,4,128,1,float16,fp8,0,0.11700800061225891
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,8,128,1,float16,float16,0,0.11621866623560588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,24,4,128,1,float16,fp8,0,0.3452213207880656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,1,128,1,float16,float16,0,1.499359925587972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,1,128,1,float16,fp8,0,1.4953014055887859
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,1,128,1,float16,fp8,0,0.1950613260269165
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,2,128,1,float16,fp8,0,1.4069172541300456
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,2,128,1,float16,fp8,0,0.1933120091756185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,4,128,1,float16,float16,0,1.5496533711751301
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,4,128,1,float16,fp8,0,1.4063787460327148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,8,128,1,float16,float16,0,1.5027039845784504
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,8,128,1,float16,fp8,0,1.412725289662679
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,24,128,1,float16,float16,0,0.7773760159810384
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,1,128,1,float16,float16,0,0.9018399715423584
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,24,128,1,float16,fp8,0,0.7234506607055664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,1,128,1,float16,fp8,0,0.7636693318684896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,24,8,128,1,float16,fp8,0,0.1148373285929362
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,2,128,1,float16,float16,0,0.9006240367889404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,2,128,1,float16,fp8,0,0.7622880140940348
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,4,128,1,float16,float16,0,0.9036746819814047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,4,128,1,float16,fp8,0,0.762336015701294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,8,128,1,float16,float16,0,0.7684693336486816
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,24,8,128,1,float16,fp8,0,0.7640480200449625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,24,2,128,1,float16,float16,0,1.495914618174235
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,1,128,1,float16,float16,0,0.39925865332285565
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,24,128,1,float16,fp8,0,0.3782026767730713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,1,128,1,float16,fp8,0,0.39747734864552814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,2,128,1,float16,float16,0,0.4002773364384969
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,24,2,128,1,float16,float16,0,0.2137706677118937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,2,128,1,float16,fp8,0,0.3733813365300496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,4,128,1,float16,float16,0,0.40854934851328534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,4,128,1,float16,fp8,0,0.39906132221221924
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,8,128,1,float16,fp8,0,0.37544000148773193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,8,128,1,float16,float16,0,0.4402773380279541
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,1,128,1,float16,float16,0,0.2435093323389689
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,1,128,1,float16,fp8,0,0.2023680011431376
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,2,128,1,float16,float16,0,0.2177226742108663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,2,128,1,float16,fp8,0,0.21710399786631265
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,24,128,1,float16,fp8,0,0.20548266172409058
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,4,128,1,float16,float16,0,0.2379146615664164
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,4,128,1,float16,fp8,0,0.2171199917793274
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,8,128,1,float16,float16,0,0.21921600898106894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,8,128,1,float16,fp8,0,0.2169653375943502
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,1,128,1,float16,float16,0,0.13501333196957907
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,2,128,1,float16,float16,0,0.14046399792035422
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,1,128,1,float16,fp8,0,0.12659733494122824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,2,128,1,float16,fp8,0,0.12324266632397969
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,24,128,1,float16,fp8,0,0.11771733562151591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,4,128,1,float16,float16,0,0.13595199584960938
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,4,128,1,float16,fp8,0,0.11611200372378032
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,8,128,1,float16,fp8,0,0.12570133805274963
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,8,128,1,float16,float16,0,0.12778133153915405
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,1,128,1,float16,float16,0,0.08707200487454732
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,2,128,1,float16,float16,0,0.08674133817354839
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,24,128,1,float16,float16,0,0.08745599786440532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,24,128,1,float16,fp8,0,0.07422400017579396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,2,128,1,float16,fp8,0,0.07915733257929485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,4,128,1,float16,float16,0,0.08648000160853068
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,4,128,1,float16,fp8,0,0.07902400195598602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,8,128,1,float16,fp8,0,0.07890666524569194
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,8,128,1,float16,float16,0,0.08072533210118611
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,24,24,128,1,float16,float16,0,0.40647466977437335
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,1,128,1,float16,float16,0,1.5717652638753254
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,24,24,128,1,float16,float16,0,0.22196267048517862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,1,128,1,float16,fp8,0,1.4964799880981445
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,4,128,1,float16,float16,0,1.5021653175354004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,2,128,1,float16,float16,0,1.5657599767049153
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,2,128,1,float16,fp8,0,1.42084805170695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,24,24,128,1,float16,float16,0,0.1290186643600464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,8,128,1,float16,float16,0,1.508394718170166
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,24,128,1,float16,float16,0,0.7774933179219564
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,4,128,1,float16,fp8,0,1.4988266626993816
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,1,128,1,float16,float16,0,0.8953279654184977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,24,1,128,1,float16,fp8,0,0.07423466444015503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,24,128,1,float16,fp8,0,0.7279199759165446
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,1,128,1,float16,fp8,0,0.7206400235493978
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,2,128,1,float16,float16,0,0.8635946909586588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,4,128,1,float16,float16,0,0.7642666498819987
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,2,128,1,float16,fp8,0,0.7606346607208252
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,4,128,1,float16,fp8,0,0.7619840304056803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,8,128,1,float16,fp8,0,0.761573314666748
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,24,8,128,1,float16,float16,0,0.7673013210296631
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,24,128,1,float16,float16,0,0.401962677637736
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,1,128,1,float16,fp8,0,0.3720746835072835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,2,128,1,float16,float16,0,0.4534613291422526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,2,128,1,float16,fp8,0,0.3963093360265096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,24,128,1,float16,fp8,0,0.37593066692352295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,4,128,1,float16,float16,0,0.39645334084828693
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,4,128,1,float16,fp8,0,0.372325340906779
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,8,128,1,float16,float16,0,0.39696534474690753
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,1,128,1,float16,float16,0,0.23483733336130777
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,24,128,1,float16,fp8,0,0.20111467440923056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,24,128,1,float16,float16,0,0.21609600385030112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,2,128,1,float16,fp8,0,0.2113813360532125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,2,128,1,float16,float16,0,0.2113599975903829
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,4,128,1,float16,float16,0,0.23277332385381064
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,4,128,1,float16,fp8,0,0.2128480076789856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,8,128,1,float16,float16,0,0.22868265708287558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,8,128,1,float16,fp8,0,0.19940799474716187
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,1,128,1,float16,float16,0,0.12963199615478516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,24,8,128,1,float16,fp8,0,1.4971200625101726
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,1,128,1,float16,fp8,0,0.1107360025246938
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,24,128,1,float16,float16,0,0.12597333391507468
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,24,128,1,float16,fp8,0,0.11315733194351196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,2,128,1,float16,float16,0,0.128629336754481
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,2,128,1,float16,fp8,0,0.11775466799736023
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,4,128,1,float16,float16,0,0.11983999609947205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,8,128,1,float16,fp8,0,0.11902399857838948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,8,128,1,float16,float16,0,0.12117333213488261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,1,128,1,float16,float16,0,0.07663999994595845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,1,128,1,float16,float16,0,0.4469706614812215
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,24,128,1,float16,float16,0,0.08177599807580312
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,24,128,1,float16,fp8,0,0.0664106657107671
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,2,128,1,float16,float16,0,0.07690133154392242
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,2,128,1,float16,fp8,0,0.07236800094445546
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,4,128,1,float16,float16,0,0.07707733412583669
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,4,128,1,float16,fp8,0,0.07188266515731812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,8,128,1,float16,float16,0,0.07965333263079326
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,8,128,1,float16,fp8,0,0.06905066470305125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,1,128,1,float16,float16,0,0.05402666827042898
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,24,8,128,1,float16,fp8,0,0.39523200194040936
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,1,128,1,float16,fp8,0,0.048063998421033226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,24,128,1,float16,float16,0,0.04993066688378652
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,24,128,1,float16,fp8,0,0.04554666578769684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,24,1,128,1,float16,fp8,0,0.19847466548283896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,2,128,1,float16,fp8,0,0.04786666731039683
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,4,128,1,float16,float16,0,0.04849066833655039
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,4,128,1,float16,fp8,0,0.045498669147491455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,8,128,1,float16,float16,0,0.04934399823347727
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,1,128,1,float16,float16,0,0.9467306931813558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,1,128,1,float16,fp8,0,0.9026506741841634
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,2,128,1,float16,float16,0,0.9474133650461832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,24,4,128,1,float16,fp8,0,0.11952533324559529
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,24,1,128,1,float16,fp8,0,0.07167999943097432
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,2,128,1,float16,fp8,0,0.9027733008066813
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,4,128,1,float16,float16,0,0.9520959854125977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,4,128,1,float16,fp8,0,0.947551965713501
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,8,128,1,float16,float16,0,0.9543573061625162
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,24,128,1,float16,fp8,0,0.4668373266855876
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,1,128,1,float16,float16,0,0.5391093492507935
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,1,128,1,float16,fp8,0,0.48496532440185547
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,24,128,1,float16,float16,0,0.49452801545461017
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,8,128,1,float16,fp8,0,0.04753066599369049
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,2,128,1,float16,fp8,0,0.48414933681488037
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,2,128,1,float16,float16,0,0.5098079840342203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,4,128,1,float16,float16,0,0.533408006032308
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,4,128,1,float16,fp8,0,0.46164798736572266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,8,128,1,float16,float16,0,0.5349599917729696
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,1,128,1,float16,float16,0,0.2677546739578247
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,24,8,128,1,float16,fp8,0,0.4617280165354411
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,24,128,1,float16,fp8,0,0.24305067459742227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,1,128,1,float16,fp8,0,0.25305600961049396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,24,128,1,float16,float16,0,0.2608853379885356
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,2,128,1,float16,float16,0,0.2545813322067261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,2,128,1,float16,fp8,0,0.25430933634440106
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,4,128,1,float16,fp8,0,0.2537013292312622
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,4,128,1,float16,float16,0,0.28035199642181396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,8,128,1,float16,fp8,0,0.24195732673009238
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,24,8,128,1,float16,float16,0,0.25707199176152545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,1,128,1,float16,float16,0,0.14949867129325867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,1,128,1,float16,fp8,0,0.13963733116785684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,24,128,1,float16,float16,0,0.14245866735776266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,2,128,1,float16,float16,0,0.1523146629333496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,2,128,1,float16,fp8,0,0.14043733477592468
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,4,128,1,float16,float16,0,0.14166933298110962
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,4,128,1,float16,fp8,0,0.13797332843144736
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,8,128,1,float16,fp8,0,0.14038933316866556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,24,8,128,1,float16,fp8,0,0.9052213033040365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,24,128,1,float16,fp8,0,0.07620266576608022
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,1,128,1,float16,fp8,0,0.07554666697978973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,24,128,1,float16,float16,0,0.0827893316745758
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,2,128,1,float16,float16,0,0.07976000010967255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,2,128,1,float16,fp8,0,0.07730133334795634
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,4,128,1,float16,fp8,0,0.0809333324432373
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,8,128,1,float16,fp8,0,0.07898666461308797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,24,128,1,float16,float16,0,0.051818668842315674
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,24,128,1,float16,fp8,0,0.04975999891757965
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,1,128,1,float16,float16,0,0.05532266696294149
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,1,128,1,float16,fp8,0,0.04967466493447622
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,2,128,1,float16,float16,0,0.055626665552457176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,2,128,1,float16,fp8,0,0.04966933528582255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,4,128,1,float16,float16,0,0.05482133229573568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,8,128,1,float16,float16,0,0.049973333875338234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,24,128,1,float16,fp8,0,0.1328426698843638
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,8,128,1,float16,fp8,0,0.04953599969546
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,24,8,128,1,float16,float16,0,0.1504853367805481
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,24,128,1,float16,float16,0,0.03125333289305369
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,1,128,1,float16,float16,0,0.03365333378314972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,24,128,1,float16,fp8,0,0.028346667687098186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,2,128,1,float16,float16,0,0.033546666304270424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,1,128,1,float16,fp8,0,0.02951466788848241
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,2,128,1,float16,fp8,0,0.029557332396507263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,4,128,1,float16,float16,0,0.08277866741021474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,4,128,1,float16,fp8,0,0.029487999776999157
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,8,128,1,float16,float16,0,0.08156266808509827
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,8,128,1,float16,float16,0,0.031184000273545582
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,1,128,1,float16,float16,0,1.0522666772206624
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,1,128,1,float16,fp8,0,1.0118026733398438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,24,4,128,1,float16,fp8,0,0.050026665131251015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,2,128,1,float16,float16,0,1.066480000813802
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,2,128,1,float16,fp8,0,0.974730650583903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,24,2,128,1,float16,float16,0,0.04971733192602793
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,4,128,1,float16,float16,0,1.0526293118794758
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,8,128,1,float16,float16,0,1.0595946311950684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,4,128,1,float16,fp8,0,1.0034560362497966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,4,128,1,float16,float16,0,0.03350399931271871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,24,8,128,1,float16,fp8,0,0.02959466725587845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,24,128,1,float16,float16,0,0.5287359952926636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,1,128,1,float16,float16,0,0.5580586592356364
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,2,128,1,float16,float16,0,0.5609333515167236
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,24,8,128,1,float16,fp8,0,0.9772799809773763
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,2,128,1,float16,fp8,0,0.5141599973042806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,1,128,1,float16,fp8,0,0.4952266613642375
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,24,128,1,float16,fp8,0,0.5007040103276571
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,4,128,1,float16,fp8,0,0.5168213446935018
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,4,128,1,float16,float16,0,0.5722933212916056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,8,128,1,float16,fp8,0,0.49673600991566974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,24,8,128,1,float16,float16,0,0.5515146652857462
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,24,128,1,float16,float16,0,0.2743840018908183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,24,1,128,1,float16,float16,0,0.08692800005276997
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,1,128,1,float16,float16,0,0.29179199536641437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,2,128,1,float16,fp8,0,0.26636266708374023
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,2,128,1,float16,float16,0,0.2679786682128906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,24,128,1,float16,fp8,0,0.2600213289260864
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,1,128,1,float16,fp8,0,0.2680906653404236
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,4,128,1,float16,float16,0,0.26834134260813397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,4,128,1,float16,fp8,0,0.26632533470789593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,8,128,1,float16,float16,0,0.269978662331899
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,1,128,1,float16,float16,0,0.15613866845766702
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,24,128,1,float16,fp8,0,0.146096001068751
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,24,128,1,float16,float16,0,0.1476906637350718
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,2,128,1,float16,float16,0,0.1458080013593038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,2,128,1,float16,fp8,0,0.14513066411018372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,1,128,1,float16,fp8,0,0.1453120013078054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,4,128,1,float16,float16,0,0.1444906691710154
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,4,128,1,float16,fp8,0,0.1450453301270803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,8,128,1,float16,float16,0,0.14476266503334045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,24,8,128,1,float16,fp8,0,0.14299199978510538
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,1,128,1,float16,float16,0,0.08668800195058186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,2,128,1,float16,float16,0,0.08267733454704285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,1,128,1,float16,fp8,0,0.0762613316377004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,24,128,1,float16,fp8,0,0.07921599845091502
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,4,128,1,float16,float16,0,0.08572799960772197
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,4,128,1,float16,fp8,0,0.07645866771539052
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,8,128,1,float16,float16,0,0.08575999736785889
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,1,128,1,float16,float16,0,0.05198933184146881
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,24,128,1,float16,fp8,0,0.048170665899912514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,24,128,1,float16,float16,0,0.05090666810671488
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,2,128,1,float16,float16,0,0.051781331499417625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,2,128,1,float16,fp8,0,0.046869332591692604
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,4,128,1,float16,float16,0,0.04775466521581014
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,4,128,1,float16,fp8,0,0.04828266799449921
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,8,128,1,float16,fp8,0,0.04640533526738485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,8,128,1,float16,float16,0,0.04914666712284088
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,1,128,1,float16,float16,0,0.03728533287843069
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,24,8,128,1,float16,fp8,0,0.2686186631520589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,24,128,1,float16,fp8,0,0.03162666658560435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,1,128,1,float16,fp8,0,0.03183466692765554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,2,128,1,float16,float16,0,0.035573333501815796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,2,128,1,float16,fp8,0,0.03329066683848699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,4,128,1,float16,float16,0,0.03409600009520849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,4,128,1,float16,fp8,0,0.03356266766786575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,8,128,1,float16,float16,0,0.035461333890755974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,8,128,1,float16,fp8,0,0.03145066648721695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,24,128,1,float16,fp8,0,0.022997332115968067
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,24,128,1,float16,float16,0,0.02518400053183238
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,2,128,1,float16,fp8,0,0.08074133098125458
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,1,128,1,float16,fp8,0,0.02325333406527837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,2,128,1,float16,float16,0,0.02537599951028824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,2,128,1,float16,fp8,0,0.023130667706330616
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,4,128,1,float16,fp8,0,0.023376000424226124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,8,128,1,float16,fp8,0,0.07653333246707916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,4,128,1,float16,float16,0,0.023423999547958374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,8,128,1,float16,fp8,0,0.023215999205907185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,24,1,128,1,float16,fp8,0,0.04915200173854828
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,1,128,1,float16,float16,0,0.7683893044789633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,24,24,128,1,float16,float16,0,0.03508266558249792
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,2,128,1,float16,float16,0,0.787834644317627
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,2,128,1,float16,fp8,0,0.7497599919637045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,1,128,1,float16,float16,0,0.02510933329661687
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,24,24,128,1,float16,float16,0,0.08434666196505229
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,4,128,1,float16,float16,0,0.7714613278706869
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,4,128,1,float16,fp8,0,0.7493600050608317
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,24,8,128,1,float16,float16,0,0.02334933231274287
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,24,128,1,float16,float16,0,0.398965318997701
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,24,128,1,float16,fp8,0,0.38388800621032715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,1,128,1,float16,float16,0,0.39213867982228595
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,8,128,1,float16,float16,0,0.7906239827473959
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,2,128,1,float16,float16,0,0.4027093251546224
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,8,128,1,float16,fp8,0,0.7494346300760905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,24,1,128,1,float16,fp8,0,0.7496746381123861
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,1,128,1,float16,fp8,0,0.38155198097229004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,2,128,1,float16,fp8,0,0.39074134826660156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,4,128,1,float16,float16,0,0.39325865109761554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,4,128,1,float16,fp8,0,0.3810933430989583
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,1,128,1,float16,float16,0,0.2038080096244812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,8,128,1,float16,fp8,0,0.38186665376027423
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,1,128,1,float16,fp8,0,0.19732266664505005
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,24,128,1,float16,fp8,0,0.1994826594988505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,4,128,1,float16,float16,0,0.20445332924524942
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,2,128,1,float16,float16,0,0.21875200668970743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,2,128,1,float16,fp8,0,0.19803200165430704
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,24,8,128,1,float16,float16,0,0.4047466516494751
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,1,128,1,float16,float16,0,0.11548800269762675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,8,128,1,float16,float16,0,0.20541866620381674
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,8,128,1,float16,fp8,0,0.19847466548283896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,2,128,1,float16,float16,0,0.11606933673222859
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,24,128,1,float16,fp8,0,0.10702932874361674
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,4,128,1,float16,fp8,0,0.19735999902089438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,24,128,1,float16,float16,0,0.11237866679827373
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,4,128,1,float16,fp8,0,0.10992532968521118
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,8,128,1,float16,fp8,0,0.1069599986076355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,24,128,1,float16,float16,0,0.06745600203673045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,4,128,1,float16,float16,0,0.11105066537857056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,1,128,1,float16,fp8,0,0.06092800199985504
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,2,128,1,float16,fp8,0,0.10946133732795715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,1,128,1,float16,float16,0,0.06400533517201741
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,24,128,1,float16,fp8,0,0.06163200239340464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,1,128,1,float16,fp8,0,0.10970133543014526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,24,24,128,1,float16,float16,0,0.2083359956741333
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,2,128,1,float16,fp8,0,0.06061333417892456
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,4,128,1,float16,float16,0,0.06422399977842967
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,4,128,1,float16,fp8,0,0.05983999868233999
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,8,128,1,float16,float16,0,0.06424533327420552
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,1,128,1,float16,float16,0,0.03761066744724909
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,8,128,1,float16,fp8,0,0.06224533418814341
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,1,128,1,float16,fp8,0,0.03753600021203359
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,2,128,1,float16,float16,0,0.03766933331886927
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,24,128,1,float16,float16,0,0.03886933376391729
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,24,128,1,float16,fp8,0,0.0358240008354187
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,4,128,1,float16,float16,0,0.039450667798519135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,4,128,1,float16,fp8,0,0.035573333501815796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,8,128,1,float16,float16,0,0.03763733307520548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,8,128,1,float16,fp8,0,0.03559466699759165
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,24,128,1,float16,fp8,0,0.02532800038655599
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,1,128,1,float16,float16,0,0.027114666998386383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,2,128,1,float16,float16,0,0.025749333202838898
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,4,128,1,float16,float16,0,0.025349333882331848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,4,128,1,float16,fp8,0,0.025066666305065155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,24,8,128,1,float16,float16,0,0.11103999614715576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,8,128,1,float16,float16,0,0.025306666890780132
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,8,128,1,float16,fp8,0,0.025786665578683216
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,24,2,128,1,float16,float16,0,0.06314133107662201
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,1,128,1,float16,float16,0,0.019007999449968338
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,1,128,1,float16,fp8,0,0.017210666090250015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,2,128,1,float16,float16,0,0.018960000326236088
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,24,2,128,1,float16,fp8,0,0.035445332527160645
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,2,128,1,float16,fp8,0,0.016970666746298473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,4,128,1,float16,float16,0,0.01929066702723503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,4,128,1,float16,fp8,0,0.016997333616018295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,8,128,1,float16,float16,0,0.01911466692884763
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,1,128,1,float16,fp8,0,0.02514133354028066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,2,128,1,float16,fp8,0,0.025253333151340485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,1,128,1,float16,float16,0,0.017157333592573803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,24,128,1,float16,float16,0,0.01738133281469345
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,24,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,1,128,1,float16,fp8,0,0.016895999511082966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,2,128,1,float16,float16,0,0.017077332983414333
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,2,128,1,float16,fp8,0,0.016997333616018295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,4,128,1,float16,float16,0,0.01695999999841054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,4,128,1,float16,fp8,0,0.01681600014368693
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,8,128,1,float16,fp8,0,0.01718933383623759
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,24,8,128,1,float16,float16,0,0.0170666662355264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,24,128,1,float16,float16,0,0.016970666746298473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,1,128,1,float16,float16,0,0.3222133318583171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,1,128,1,float16,fp8,0,0.31990400950113934
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,2,128,1,float16,float16,0,0.3274293343226115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,2,128,1,float16,fp8,0,0.3230560024579366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,8,128,1,float16,fp8,0,0.017194667210181553
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,24,24,128,1,float16,float16,0,0.02735466758410136
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,4,128,1,float16,float16,0,0.32390934228897095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,4,128,1,float16,fp8,0,0.3230186700820923
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,24,128,1,float16,float16,0,0.17041067282358804
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,1,128,1,float16,float16,0,0.1711519956588745
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,8,128,1,float16,float16,0,0.3243786692619324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,24,8,128,1,float16,fp8,0,0.3200800021489461
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,1,128,1,float16,fp8,0,0.16800532738367716
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,2,128,1,float16,float16,0,0.1676959991455078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,4,128,1,float16,float16,0,0.16873067617416382
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,24,24,128,1,float16,fp8,0,0.01709866647919019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,8,128,1,float16,fp8,0,0.16693333784739176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,4,128,1,float16,fp8,0,0.1676266590754191
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,8,128,1,float16,float16,0,0.1686506668726603
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,1,128,1,float16,float16,0,0.09487467010815938
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,24,128,1,float16,float16,0,0.09488000472386678
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,2,128,1,float16,float16,0,0.09634666641553243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,1,128,1,float16,fp8,0,0.0895146628220876
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,2,128,1,float16,fp8,0,0.09099733829498291
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,4,128,1,float16,float16,0,0.0926026701927185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,4,128,1,float16,fp8,0,0.08876267075538635
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,8,128,1,float16,float16,0,0.09559999903043111
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,8,128,1,float16,fp8,0,0.09058666229248047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,24,128,1,float16,float16,0,0.05574933191140493
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,1,128,1,float16,float16,0,0.055776000022888184
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,24,128,1,float16,fp8,0,0.05170666674772898
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,1,128,1,float16,fp8,0,0.05235733091831207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,24,128,1,float16,fp8,0,0.16660799582799277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,2,128,1,float16,fp8,0,0.052015999952952065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,4,128,1,float16,fp8,0,0.05213866631189982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,4,128,1,float16,float16,0,0.0539626677831014
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,8,128,1,float16,float16,0,0.05398933092753092
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,24,2,128,1,float16,fp8,0,0.16843199729919434
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,1,128,1,float16,float16,0,0.031471999982992806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,8,128,1,float16,fp8,0,0.0533493310213089
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,2,128,1,float16,fp8,0,0.030432000756263733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,24,128,1,float16,float16,0,0.031248000760873158
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,24,128,1,float16,fp8,0,0.029525332152843475
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,2,128,1,float16,float16,0,0.03146666785081228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,4,128,1,float16,fp8,0,0.02943466603755951
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,8,128,1,float16,float16,0,0.030165334542592365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,8,128,1,float16,fp8,0,0.029493334392706554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,24,128,1,float16,float16,0,0.023050665855407715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,1,128,1,float16,float16,0,0.02306666721900304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,24,128,1,float16,fp8,0,0.021375998854637146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,24,24,128,1,float16,fp8,0,0.09141332904497783
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,2,128,1,float16,float16,0,0.02309866746266683
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,2,128,1,float16,fp8,0,0.02130666623512904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,4,128,1,float16,float16,0,0.022709332406520844
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,4,128,1,float16,fp8,0,0.02237333357334137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,8,128,1,float16,fp8,0,0.021312000850836437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,8,128,1,float16,float16,0,0.022976001103719074
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,1,128,1,float16,float16,0,0.01523200049996376
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,24,128,1,float16,float16,0,0.015050667027632395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,24,2,128,1,float16,float16,0,0.054005334774653115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,2,128,1,float16,fp8,0,0.014954666296641031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,2,128,1,float16,float16,0,0.01522133375207583
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,4,128,1,float16,float16,0,0.015141333142916361
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,4,128,1,float16,fp8,0,0.015146666516860327
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,8,128,1,float16,float16,0,0.014778666198253632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,8,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,4,128,1,float16,float16,0,0.031311998764673867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,1,128,1,float16,float16,0,0.014997333288192749
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,24,128,1,float16,float16,0,0.014752000570297241
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,2,128,1,float16,float16,0,0.014848000059525171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,1,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,2,128,1,float16,fp8,0,0.014805333067973455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,24,1,128,1,float16,fp8,0,0.02176533391078313
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,4,128,1,float16,float16,0,0.014858666807413101
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,4,128,1,float16,fp8,0,0.014858666807413101
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,8,128,1,float16,float16,0,0.014959999670584997
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,8,128,1,float16,fp8,0,0.014720000326633453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,1,128,1,float16,float16,0,0.01471466695268949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,24,128,1,float16,float16,0,0.014901333798964819
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,1,128,1,float16,fp8,0,0.01469333345691363
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,24,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,2,128,1,float16,float16,0,0.014848000059525171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,2,128,1,float16,fp8,0,0.014783999572197596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,1,128,1,float16,fp8,0,0.014826666563749313
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,4,128,1,float16,fp8,0,0.013845333208640417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,8,128,1,float16,float16,0,0.01498666654030482
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,8,128,1,float16,fp8,0,0.01479999969402949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,24,1,128,1,float16,fp8,0,0.030181333422660828
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,24,24,128,1,float16,fp8,0,0.014767999450365702
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,1,128,1,float16,float16,0,0.20349866151809692
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,2,128,1,float16,float16,0,0.20314133167266846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,2,128,1,float16,fp8,0,0.19953600565592447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,1,128,1,float16,fp8,0,0.19938133160273233
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,4,128,1,float16,float16,0,0.20306666692097983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,4,128,1,float16,fp8,0,0.19985600312550864
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,24,4,128,1,float16,float16,0,0.01470400020480156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,8,128,1,float16,float16,0,0.20348266760508218
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,1,128,1,float16,float16,0,0.10665067036946614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,24,8,128,1,float16,fp8,0,0.1993866761525472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,24,128,1,float16,fp8,0,0.10337066650390625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,24,128,1,float16,float16,0,0.10758933424949646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,2,128,1,float16,float16,0,0.10921600461006165
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,1,128,1,float16,fp8,0,0.10469866792360942
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,2,128,1,float16,fp8,0,0.1055466632048289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,4,128,1,float16,float16,0,0.10689600308736165
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,24,128,1,float16,float16,0,0.06006933252016703
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,24,128,1,float16,fp8,0,0.05794133245944977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,1,128,1,float16,float16,0,0.06223999957243601
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,1,128,1,float16,fp8,0,0.05752533177534739
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,4,128,1,float16,float16,0,0.06076799829800924
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,4,128,1,float16,fp8,0,0.10458667079607646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,4,128,1,float16,fp8,0,0.05788800120353699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,8,128,1,float16,fp8,0,0.10453866918881734
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,24,24,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,24,8,128,1,float16,float16,0,0.10727999607721965
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,2,128,1,float16,fp8,0,0.05809600154558817
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,2,128,1,float16,float16,0,0.060229331254959106
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,8,128,1,float16,fp8,0,0.05817066629727682
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,1,128,1,float16,float16,0,0.03532266616821289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,24,128,1,float16,float16,0,0.03521066655715307
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,2,128,1,float16,fp8,0,0.0336053321758906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,24,128,1,float16,fp8,0,0.03380800038576126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,4,128,1,float16,fp8,0,0.033589333295822144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,1,128,1,float16,fp8,0,0.03379199902216593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,8,128,1,float16,float16,0,0.03531199942032496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,8,128,1,float16,fp8,0,0.033471999069054924
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,1,128,1,float16,float16,0,0.02111999938885371
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,1,128,1,float16,fp8,0,0.021002667645613354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,2,128,1,float16,float16,0,0.02130666623512904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,24,128,1,float16,fp8,0,0.020853333175182343
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,24,128,1,float16,float16,0,0.02123733361562093
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,2,128,1,float16,fp8,0,0.021183999876181286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,4,128,1,float16,float16,0,0.0210506667693456
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,4,128,1,float16,fp8,0,0.02086399992307027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,8,128,1,float16,fp8,0,0.021061333517233532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,24,8,128,1,float16,float16,0,0.02126399924357732
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,24,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,24,128,1,float16,float16,0,0.015765332927306492
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,2,128,1,float16,float16,0,0.017743999759356182
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,2,128,1,float16,fp8,0,0.015226667126019796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,4,128,1,float16,float16,0,0.015130666395028433
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,4,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,8,128,1,float16,float16,0,0.015226667126019796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,8,128,1,float16,fp8,0,0.015168000012636185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,1,128,1,float16,float16,0,0.010837333897749582
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,24,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,24,128,1,float16,float16,0,0.010879999647537867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,1,128,1,float16,fp8,0,0.010911999891201654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,2,128,1,float16,float16,0,0.010698666175206503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,2,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,4,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,2,128,1,float16,float16,0,0.03543466577927271
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,24,4,128,1,float16,float16,0,0.03524799893299738
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,8,128,1,float16,fp8,0,0.011071999867757162
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,24,128,1,float16,float16,0,0.010762666662534079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,1,128,1,float16,float16,0,0.010970667004585266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,1,128,1,float16,fp8,0,0.010805333654085795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,1,128,1,float16,float16,0,0.015674666812022526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,2,128,1,float16,float16,0,0.011029332876205444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,2,128,1,float16,fp8,0,0.010768000036478043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,24,1,128,1,float16,fp8,0,0.014890667051076889
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,4,128,1,float16,float16,0,0.010784000158309937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,4,128,1,float16,fp8,0,0.010954666882753372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,8,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,8,128,1,float16,float16,0,0.010672000547250112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,24,128,1,float16,float16,0,0.010778666784365972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,1,128,1,float16,float16,0,0.01091733326514562
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,24,128,1,float16,fp8,0,0.010687999427318573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,2,128,1,float16,float16,0,0.010901333143313726
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,2,128,1,float16,fp8,0,0.010672000547250112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,4,128,1,float16,float16,0,0.01062400018175443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,4,128,1,float16,fp8,0,0.010703999549150467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,8,128,1,float16,float16,0,0.010549332946538925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,8,128,1,float16,fp8,0,0.010714666297038397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,8,128,1,float16,float16,0,0.010778666784365972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,24,24,128,1,float16,fp8,0,0.010714666297038397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,1,128,1,float16,float16,0,0.16089066863059998
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,1,128,1,float16,fp8,0,0.15651200215021768
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,2,128,1,float16,float16,0,0.1628266672293345
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,2,128,1,float16,fp8,0,0.15921066204706827
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,4,128,1,float16,float16,0,0.16053332885106406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,4,128,1,float16,fp8,0,0.15634666879971823
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,24,8,128,1,float16,float16,0,0.061066667238871254
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,24,128,1,float16,float16,0,0.08637866377830505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,24,1,128,1,float16,fp8,0,0.010709332923094431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,1,128,1,float16,float16,0,0.08617066343625386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,1,128,1,float16,fp8,0,0.08287466565767924
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,24,128,1,float16,fp8,0,0.08338666955629985
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,2,128,1,float16,float16,0,0.08921600381533305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,2,128,1,float16,fp8,0,0.08237333099047343
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,4,128,1,float16,float16,0,0.08565333485603333
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,4,128,1,float16,fp8,0,0.08433600266774495
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,8,128,1,float16,fp8,0,0.08226666847864787
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,1,128,1,float16,float16,0,0.0488319993019104
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,24,128,1,float16,fp8,0,0.04589866598447164
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,24,128,1,float16,float16,0,0.04809600114822388
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,2,128,1,float16,float16,0,0.04850666721661886
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,1,128,1,float16,fp8,0,0.0458186666170756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,4,128,1,float16,float16,0,0.04794666667779287
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,2,128,1,float16,fp8,0,0.0462773342927297
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,4,128,1,float16,fp8,0,0.04580800235271454
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,8,128,1,float16,float16,0,0.04756799836953481
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,24,8,128,1,float16,fp8,0,0.045706664522488914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,8,128,1,float16,float16,0,0.16106133659680685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,24,128,1,float16,fp8,0,0.027797333896160126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,1,128,1,float16,float16,0,0.02941333254178365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,1,128,1,float16,fp8,0,0.02740799884001414
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,2,128,1,float16,fp8,0,0.02743999908367793
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,4,128,1,float16,float16,0,0.02757333219051361
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,2,128,1,float16,float16,0,0.029157333076000214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,4,128,1,float16,fp8,0,0.027109332382678986
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,8,128,1,float16,float16,0,0.029029332101345062
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,8,128,1,float16,fp8,0,0.027104000250498455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,1,128,1,float16,float16,0,0.018794666975736618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,24,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,1,128,1,float16,fp8,0,0.01714133347074191
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,2,128,1,float16,float16,0,0.018965333700180054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,24,8,128,1,float16,float16,0,0.08717333277066548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,2,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,4,128,1,float16,float16,0,0.01714133347074191
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,8,128,1,float16,fp8,0,0.01692266638080279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,8,128,1,float16,float16,0,0.018687999496857326
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,1,128,1,float16,float16,0,0.013525333255529404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,1,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,24,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,2,128,1,float16,float16,0,0.013546666751305262
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,2,128,1,float16,fp8,0,0.013722666849692663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,4,128,1,float16,float16,0,0.014058666924635569
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,24,8,128,1,float16,fp8,0,0.15638933579126993
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,24,24,128,1,float16,float16,0,0.029919999341169994
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,8,128,1,float16,float16,0,0.013114667187134424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,24,4,128,1,float16,float16,0,0.010981333752473196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,8,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,24,128,1,float16,float16,0,0.010735999792814255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,1,128,1,float16,fp8,0,0.010709332923094431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,24,128,1,float16,fp8,0,0.010794666906197866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,2,128,1,float16,fp8,0,0.010863999525705973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,2,128,1,float16,float16,0,0.010805333654085795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,4,128,1,float16,float16,0,0.010757333288590113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,4,128,1,float16,fp8,0,0.010821333775917688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,8,128,1,float16,float16,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,8,128,1,float16,fp8,0,0.010794666906197866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,4,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,1,128,1,float16,float16,0,0.010538666198650995
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,24,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,2,128,1,float16,float16,0,0.010709332923094431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,2,128,1,float16,fp8,0,0.009626666704813639
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,24,128,1,float16,float16,0,0.01302933320403099
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,4,128,1,float16,fp8,0,0.009103999783595404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,8,128,1,float16,float16,0,0.008767999708652496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,8,128,1,float16,fp8,0,0.009653333574533463
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,24,4,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,24,128,1,float16,float16,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,1,128,1,float16,float16,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,24,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,24,1,128,1,float16,float16,0,0.010682666053374609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,1,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,2,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,2,128,1,float16,float16,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,4,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,4,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,8,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,24,8,128,1,float16,float16,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,24,128,1,float16,float16,0,0.010026666646202406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,4,128,1,float16,float16,0,0.010682666053374609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,1,128,1,float16,float16,0,0.14122666915257773
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,1,128,1,float16,fp8,0,0.13587733109792074
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,2,128,1,float16,float16,0,0.14220800002415976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,2,128,1,float16,fp8,0,0.13777599732081094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,4,128,1,float16,fp8,0,0.13662933309872946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,8,128,1,float16,float16,0,0.14175466696421304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,8,128,1,float16,fp8,0,0.13589333494504294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,24,128,1,float16,float16,0,0.07648533085982005
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,1,128,1,float16,float16,0,0.07458666463692983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,24,24,128,1,float16,float16,0,0.018965333700180054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,24,128,1,float16,fp8,0,0.07218133409818013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,1,128,1,float16,fp8,0,0.07236266632874806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,2,128,1,float16,float16,0,0.0767146646976471
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,24,1,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,4,128,1,float16,float16,0,0.07442666590213776
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,4,128,1,float16,fp8,0,0.07089599967002869
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,8,128,1,float16,fp8,0,0.07071466743946075
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,24,128,1,float16,float16,0,0.04264000058174133
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,1,128,1,float16,fp8,0,0.039733332892258964
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,24,128,1,float16,fp8,0,0.04146666576464971
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,2,128,1,float16,float16,0,0.041722665230433144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,4,128,1,float16,float16,0,0.04156800111134847
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,2,128,1,float16,fp8,0,0.04168533285458883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,4,128,1,float16,fp8,0,0.041349334021409355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,8,128,1,float16,float16,0,0.041589332123597465
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,8,128,1,float16,fp8,0,0.03972266614437103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,24,4,128,1,float16,float16,0,0.1439786652723948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,1,128,1,float16,float16,0,0.025392000873883564
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,24,128,1,float16,fp8,0,0.025045332809289295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,1,128,1,float16,fp8,0,0.025114665428797405
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,2,128,1,float16,fp8,0,0.02508266766866048
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,2,128,1,float16,float16,0,0.027130665878454845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,4,128,1,float16,float16,0,0.027104000250498455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,4,128,1,float16,fp8,0,0.024986666937669117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,8,128,1,float16,float16,0,0.025370667378107708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,8,128,1,float16,fp8,0,0.02502399931351344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,24,128,1,float16,fp8,0,0.016143999993801117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,1,128,1,float16,fp8,0,0.01621333385507266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,24,128,1,float16,float16,0,0.0173333336909612
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,2,128,1,float16,float16,0,0.016837333639462788
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,2,128,1,float16,fp8,0,0.015957333147525787
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,4,128,1,float16,float16,0,0.017093333105246227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,2,128,1,float16,fp8,0,0.07247466842333476
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,4,128,1,float16,fp8,0,0.01534933348496755
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,8,128,1,float16,float16,0,0.016714667280515034
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,24,8,128,1,float16,float16,0,0.07684266567230225
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,24,1,128,1,float16,float16,0,0.04342933495839437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,1,128,1,float16,float16,0,0.01268799975514412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,24,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,24,128,1,float16,float16,0,0.012757333616415659
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,4,128,1,float16,float16,0,0.012837332983811697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,4,128,1,float16,fp8,0,0.012773333738247553
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,8,128,1,float16,fp8,0,0.012784000486135483
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,24,24,128,1,float16,float16,0,0.02537599951028824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,24,128,1,float16,float16,0,0.010832000523805618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,24,128,1,float16,fp8,0,0.009082666908701261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,1,128,1,float16,float16,0,0.00997866690158844
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,1,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,1,128,1,float16,float16,0,0.017114666601022083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,2,128,1,float16,float16,0,0.0100853331387043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,2,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,4,128,1,float16,float16,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,4,128,1,float16,fp8,0,0.009765333185593287
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,8,128,1,float16,float16,0,0.009066666786869368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,24,8,128,1,float16,fp8,0,0.01682666689157486
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,24,128,1,float16,float16,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,24,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,2,128,1,float16,float16,0,0.012746666868527731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,1,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,2,128,1,float16,float16,0,0.008858666444818178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,2,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,4,128,1,float16,float16,0,0.009066666786869368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,4,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,8,128,1,float16,float16,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,8,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,1,128,1,float16,float16,0,0.009130666653315226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,24,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,1,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,24,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,2,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,2,128,1,float16,float16,0,0.008634666601816813
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,4,128,1,float16,float16,0,0.00902399979531765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,4,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,8,128,1,float16,float16,0,0.008842666943868002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,24,8,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,1,128,1,float16,float16,0,0.13910399874051413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,1,128,1,float16,fp8,0,0.12814399600028992
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,1,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,8,128,1,float16,float16,0,0.0129120002190272
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,2,128,1,float16,float16,0,0.13657599687576294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,2,128,1,float16,fp8,0,0.12776000301043192
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,4,128,1,float16,float16,0,0.1378613313039144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,4,128,1,float16,fp8,0,0.12745066483815512
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,8,128,1,float16,float16,0,0.13759467005729675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,24,8,128,1,float16,fp8,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,24,8,128,1,float16,fp8,0,0.12779733538627625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,24,128,1,float16,float16,0,0.0759626676638921
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,1,128,1,float16,float16,0,0.07420266668001811
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,1,128,1,float16,fp8,0,0.06816533207893372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,24,128,1,float16,fp8,0,0.06765333314736684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,2,128,1,float16,float16,0,0.07561600208282471
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,24,1,128,1,float16,float16,0,0.009045333291093508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,4,128,1,float16,float16,0,0.0724480003118515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,2,128,1,float16,fp8,0,0.07019733389218648
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,8,128,1,float16,float16,0,0.07316799958546956
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,24,2,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,8,128,1,float16,fp8,0,0.06806399921576183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,1,128,1,float16,float16,0,0.043562665581703186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,1,128,1,float16,fp8,0,0.03881600002447764
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,2,128,1,float16,float16,0,0.041663999358812966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,24,128,1,float16,float16,0,0.04387733340263367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,2,128,1,float16,fp8,0,0.03948266555865606
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,4,128,1,float16,float16,0,0.0414986660083135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,4,128,1,float16,fp8,0,0.03770666569471359
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,8,128,1,float16,float16,0,0.04153066625197729
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,1,128,1,float16,float16,0,0.025055999557177227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,1,128,1,float16,fp8,0,0.023999998966852825
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,2,128,1,float16,float16,0,0.025279998779296875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,2,128,1,float16,fp8,0,0.0230880007147789
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,4,128,1,float16,float16,0,0.025274666647116344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,4,128,1,float16,fp8,0,0.023237332701683044
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,8,128,1,float16,float16,0,0.025120000044504803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,8,128,1,float16,fp8,0,0.023728000621000927
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,24,128,1,float16,float16,0,0.016890666137139004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,24,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,1,128,1,float16,fp8,0,0.014778666198253632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,2,128,1,float16,float16,0,0.01685333376129468
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,24,4,128,1,float16,fp8,0,0.06637333333492279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,4,128,1,float16,float16,0,0.01681600014368693
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,4,128,1,float16,fp8,0,0.015216000378131866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,24,128,1,float16,fp8,0,0.0376800000667572
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,8,128,1,float16,fp8,0,0.015226667126019796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,24,8,128,1,float16,fp8,0,0.03975466638803482
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,24,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,24,128,1,float16,float16,0,0.025248001019159954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,1,128,1,float16,float16,0,0.012741333494583765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,1,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,2,128,1,float16,fp8,0,0.012730666746695837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,24,24,128,1,float16,fp8,0,0.023061332603295643
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,2,128,1,float16,float16,0,0.012725333372751871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,4,128,1,float16,fp8,0,0.012645332763592402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,8,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,24,128,1,float16,float16,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,1,128,1,float16,float16,0,0.016837333639462788
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,24,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,1,128,1,float16,float16,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,1,128,1,float16,fp8,0,0.009109333157539368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,2,128,1,float16,float16,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,4,128,1,float16,float16,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,2,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,4,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,8,128,1,float16,fp8,0,0.00877333308259646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,24,128,1,float16,float16,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,24,8,128,1,float16,float16,0,0.016927999754746754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,24,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,2,128,1,float16,float16,0,0.008687999720374743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,1,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,2,128,1,float16,fp8,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,24,128,1,float16,float16,0,0.013066666821638743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,4,128,1,float16,fp8,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,8,128,1,float16,float16,0,0.008837333569924036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,8,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,4,128,1,float16,float16,0,0.012794667234023413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,1,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,1,128,1,float16,fp8,0,0.008618666479984919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,2,128,1,float16,float16,0,0.00997866690158844
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,24,8,128,1,float16,float16,0,0.012847999731699625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,24,128,1,float16,fp8,0,0.008736000085870424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,4,128,1,float16,float16,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,4,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,8,128,1,float16,float16,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,8,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,2,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,1,128,1,float16,float16,0,0.012917333592971167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,24,8,128,1,float16,float16,0,0.009296000003814697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,1,128,1,float16,fp8,0,0.010735999792814255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,0,0.015061333775520325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,0,0.012794667234023413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,24,1,128,1,float16,float16,0,0.008874666566650072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,0,0.016778666526079178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,0,0.01482133318980535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,0,0.027215999861558277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,0,0.02332266668478648
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,24,128,1,float16,float16,0,0.0409706657131513
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,1,128,1,float16,float16,0,0.009098666409651438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,1,128,1,float16,fp8,0,0.008757333581646284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,24,128,1,float16,float16,0,0.008736000085870424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,24,2,128,1,float16,fp8,0,0.010288000106811523
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,0,0.010650667051474253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,0,0.012730666746695837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,0,0.01098666712641716
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,0,0.016890666137139004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,1,128,1,float16,float16,0,0.008837333569924036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,0,0.014688000082969666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,24,128,1,float16,fp8,0,0.019199999670187633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,24,128,1,float16,float16,0,0.02096533278624217
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,0,0.008538666491707167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,0,0.00697066696981589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,0,0.011749333391586939
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,0,0.010762666662534079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,1,128,1,float16,float16,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,0,0.007285333548982938
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,1,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,24,128,1,float16,fp8,0,0.012794667234023413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,0,0.007178666690985362
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,0,0.00874133345981439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,0,0.007680000116427739
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,0,0.009125333279371262
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,1,128,1,float16,float16,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,24,128,1,float16,float16,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,24,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,1,128,1,float16,fp8,0,0.008645333349704742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,24,24,128,1,float16,fp8,0,0.03142400085926056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,0,0.0069973332186539965
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,0,0.006911999856432279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,0,0.006895999734600385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,24,128,1,float16,float16,0,0.009088000282645226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,24,128,1,float16,fp8,0,0.008767999708652496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,24,1,128,1,float16,fp8,0,0.008709333216150602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,1,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,1,128,1,float16,float16,0,0.00867733359336853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,0,0.006895999734600385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,0,0.006831999868154526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,0,0.0069973332186539965
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,0,0.006815999746322632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,24,128,1,float16,float16,0,0.014789332946141561
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,0,0.00666133314371109
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,1,128,1,float16,float16,0,0.008656000097592672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,24,128,1,float16,float16,0,0.006890666360656421
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,0,0.00697066696981589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,0,0.0069866664707660675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,0,0.00679466687142849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,0,0.006618666773041089
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,0,0.006581333155433337
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,0,0.006640000268816948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,24,128,1,float16,float16,0,0.0069333333522081375
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,24,128,1,float16,fp8,0,0.006800000245372455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,1,128,1,float16,float16,0,0.008645333349704742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,0,0.006682666639486949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,0,0.007007999966541926
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,0,0.0069440001000960665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,0,0.00666133314371109
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,0,0.006522666662931442
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,0,0.006629333520929019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,24,128,1,float16,fp8,0,0.006655999769767125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,24,128,1,float16,float16,0,0.006682666639486949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,24,128,1,float16,fp8,0,0.006688000013430913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,1,128,1,float16,float16,0,0.008714666590094566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,1,128,1,float16,fp8,0,0.00850133349498113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,0,0.006895999734600385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,0,0.006815999746322632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,0,0.006874666859706243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,0,0.006751999879876773
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,0,0.006730666384100914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,0,0.00679466687142849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,0,0.00690133310854435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,1,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,0,0.006682666639486949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,1,128,1,float16,float16,0,5.482751846313477
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,1,128,1,float16,fp8,0,5.365013122558594
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,2,128,1,float16,float16,0,6.05354118347168
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,2,128,1,float16,fp8,0,5.047834714253743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,0,0.006490666419267654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,4,128,1,float16,float16,0,5.489141464233398
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,4,128,1,float16,fp8,0,5.043999989827474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,16,128,1,float16,float16,0,2.9079360961914062
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,16,128,1,float16,fp8,0,2.5859519640604653
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,1,128,1,float16,float16,0,3.120762825012207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,8,128,1,float16,float16,0,5.4880320231119795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,16,8,128,1,float16,fp8,0,5.365354537963867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,24,1,128,1,float16,fp8,0,0.008618666479984919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,1,128,1,float16,fp8,0,2.7440268198649087
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,2,128,1,float16,float16,0,2.9815359115600586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,4,128,1,float16,float16,0,2.8112265268961587
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,4,128,1,float16,fp8,0,2.914426803588867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,16,128,1,float16,float16,0,1.4710079828898113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,16,128,1,float16,fp8,0,1.4356907208760579
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,8,128,1,float16,float16,0,2.8096211751302085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,1,128,1,float16,float16,0,1.7935412724812825
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,1,128,1,float16,fp8,0,1.4368960062662761
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,2,128,1,float16,float16,0,1.759424050649007
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,2,128,1,float16,fp8,0,1.4344959259033203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,4,128,1,float16,fp8,0,1.432096004486084
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,8,128,1,float16,float16,0,1.6175947189331055
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,8,128,1,float16,fp8,0,1.3678879737854004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,16,128,1,float16,float16,0,0.8014933268229166
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,16,128,1,float16,fp8,0,0.7336533069610596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,1,128,1,float16,float16,0,0.9584533373514811
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,1,128,1,float16,fp8,0,0.7796586354573568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,2,128,1,float16,float16,0,0.8489440282185873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,2,128,1,float16,fp8,0,0.7769066492716471
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,4,128,1,float16,fp8,0,0.780741294225057
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,2,128,1,float16,fp8,0,2.758058547973633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,8,128,1,float16,float16,0,0.8009706338246664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,8,128,1,float16,fp8,0,0.7332373460133871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,16,8,128,1,float16,fp8,0,2.74674129486084
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,1,128,1,float16,float16,0,3.249850591023763
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,16,4,128,1,float16,float16,0,1.7864747047424316
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,1,128,1,float16,fp8,0,3.1097278594970703
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,2,128,1,float16,fp8,0,3.1083361307779946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,2,128,1,float16,float16,0,3.6739412943522134
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,4,128,1,float16,float16,0,3.868687947591146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,4,128,1,float16,fp8,0,3.0934133529663086
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,16,128,1,float16,float16,0,1.6377973556518555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,16,128,1,float16,fp8,0,1.5029600461324055
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,16,4,128,1,float16,float16,0,0.9435413678487142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,1,128,1,float16,float16,0,2.0083893140157065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,8,128,1,float16,fp8,0,3.1135679880777993
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,16,8,128,1,float16,float16,0,3.371429443359375
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,1,128,1,float16,fp8,0,2.0695093472798667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,2,128,1,float16,float16,0,1.7449013392130535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,2,128,1,float16,fp8,0,1.502069314320882
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,4,128,1,float16,float16,0,1.633237361907959
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,4,128,1,float16,fp8,0,1.601194699605306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,16,128,1,float16,float16,0,0.9250986576080322
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,8,128,1,float16,float16,0,1.7799679438273113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,16,8,128,1,float16,fp8,0,1.603333314259847
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,1,128,1,float16,fp8,0,0.8497599760691324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,16,128,1,float16,fp8,0,0.8422079881032308
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,2,128,1,float16,fp8,0,0.846560001373291
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,2,128,1,float16,float16,0,1.0326666831970215
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,4,128,1,float16,float16,0,0.9535573323567709
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,4,128,1,float16,fp8,0,0.8830826282501221
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,1,128,1,float16,float16,0,0.5516000191370646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,16,128,1,float16,fp8,0,0.4442773262659709
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,16,128,1,float16,float16,0,0.4957066774368286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,1,128,1,float16,fp8,0,0.4731146494547526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,2,128,1,float16,float16,0,0.559066653251648
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,2,128,1,float16,fp8,0,0.4723680019378662
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,4,128,1,float16,float16,0,0.4843519926071167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,4,128,1,float16,fp8,0,0.47865601380666095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,8,128,1,float16,fp8,0,0.48015467325846356
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,16,8,128,1,float16,float16,0,0.5514933268229166
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,1,128,1,float16,fp8,0,2.0682934125264487
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,2,128,1,float16,fp8,0,2.221855958302816
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,2,128,1,float16,float16,0,2.6941652297973633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,4,128,1,float16,float16,0,2.633141358693441
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,1,128,1,float16,float16,0,1.0384480158487956
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,4,128,1,float16,fp8,0,2.1055466334025064
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,16,128,1,float16,float16,0,1.4636106491088867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,8,128,1,float16,float16,0,2.246410687764486
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,8,128,1,float16,fp8,0,0.8500906626383463
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,1,128,1,float16,float16,0,1.2461333274841309
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,16,128,1,float16,fp8,0,1.145466645558675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,8,128,1,float16,fp8,0,2.5697919527689614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,2,128,1,float16,fp8,0,1.0722880363464355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,1,128,1,float16,fp8,0,1.260090668996175
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,2,128,1,float16,float16,0,1.4234239260355632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,4,128,1,float16,float16,0,1.1696373621622722
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,4,128,1,float16,fp8,0,1.1477973461151123
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,8,128,1,float16,float16,0,1.2337493101755779
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,1,128,1,float16,float16,0,0.7429013252258301
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,1,128,1,float16,fp8,0,0.6142559846242269
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,16,128,1,float16,fp8,0,0.575984001159668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,16,128,1,float16,float16,0,0.6973333358764648
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,2,128,1,float16,float16,0,0.7305066585540771
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,2,128,1,float16,fp8,0,0.6133386691411337
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,4,128,1,float16,float16,0,0.6322453419367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,4,128,1,float16,fp8,0,0.5744906663894653
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,8,128,1,float16,float16,0,0.6364959875742594
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,16,8,128,1,float16,fp8,0,0.6198506752649943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,1,128,1,float16,float16,0,0.39962132771809894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,16,128,1,float16,float16,0,0.40109864870707196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,16,128,1,float16,fp8,0,0.32946133613586426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,1,128,1,float16,fp8,0,0.3553813298543294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,2,128,1,float16,float16,0,0.3977866570154826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,2,128,1,float16,fp8,0,0.3475253184636434
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,4,128,1,float16,fp8,0,0.34942400455474854
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,4,128,1,float16,float16,0,0.3914506832758586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,8,128,1,float16,fp8,0,0.34995734691619873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,16,8,128,1,float16,float16,0,0.8669973214467367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,1,128,1,float16,fp8,0,2.815685272216797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,1,128,1,float16,float16,0,3.5363572438557944
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,2,128,1,float16,fp8,0,2.667509396870931
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,2,128,1,float16,float16,0,3.422853469848633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,16,1,128,1,float16,float16,0,2.24345064163208
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,4,128,1,float16,float16,0,3.3425865173339844
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,16,128,1,float16,float16,0,1.6565440495808919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,16,128,1,float16,fp8,0,1.3670986493428547
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,1,128,1,float16,float16,0,1.8015146255493164
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,8,128,1,float16,fp8,0,2.66650660832723
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,8,128,1,float16,float16,0,3.126079877217611
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,16,8,128,1,float16,float16,0,0.36346133550008136
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,1,128,1,float16,fp8,0,1.419856071472168
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,2,128,1,float16,float16,0,1.4835573832194011
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,2,128,1,float16,fp8,0,1.363573392232259
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,4,128,1,float16,float16,0,1.830901304880778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,4,128,1,float16,fp8,0,1.3635999361673992
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,8,128,1,float16,fp8,0,1.457248051961263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,1,128,1,float16,float16,0,0.8278293609619141
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,16,8,128,1,float16,fp8,0,1.1457599798838298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,16,128,1,float16,fp8,0,0.7717280387878418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,1,128,1,float16,fp8,0,0.7645920117696127
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,2,128,1,float16,fp8,0,0.7139146327972412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,4,128,1,float16,float16,0,0.9393386840820312
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,4,128,1,float16,fp8,0,0.7628160317738851
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,8,128,1,float16,float16,0,0.7794720331827799
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,16,4,128,1,float16,fp8,0,2.855365435282389
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,8,128,1,float16,fp8,0,0.7694986661275228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,1,128,1,float16,float16,0,0.493285338083903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,16,128,1,float16,float16,0,0.42532265186309814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,16,128,1,float16,fp8,0,0.41872533162434894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,1,128,1,float16,fp8,0,0.41735998789469403
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,2,128,1,float16,float16,0,0.4782880147298177
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,4,128,1,float16,fp8,0,0.4175306558609009
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,2,128,1,float16,fp8,0,0.4191146691640218
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,4,128,1,float16,float16,0,0.48123733202616376
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,8,128,1,float16,float16,0,0.49513598283131915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,16,8,128,1,float16,fp8,0,0.4170080025990804
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,1,128,1,float16,float16,0,0.2770773371060689
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,16,128,1,float16,float16,0,0.25088000297546387
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,16,128,1,float16,fp8,0,0.22638932863871256
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,1,128,1,float16,fp8,0,0.24490133921305338
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,2,128,1,float16,float16,0,0.277839998404185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,16,8,128,1,float16,float16,0,1.4946719805399578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,4,128,1,float16,float16,0,0.2754080096880595
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,8,128,1,float16,float16,0,0.2794826626777649
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,8,128,1,float16,fp8,0,0.242741326491038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,2,128,1,float16,float16,0,0.7778186798095703
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,1,128,1,float16,fp8,0,1.5666186014811199
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,2,128,1,float16,float16,0,1.8160640398661296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,2,128,1,float16,fp8,0,1.6760533650716145
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,4,128,1,float16,float16,0,1.6968906720479329
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,4,128,1,float16,fp8,0,1.6773014068603516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,8,128,1,float16,fp8,0,1.6772373517354329
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,8,128,1,float16,float16,0,2.0458720525105796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,16,128,1,float16,float16,0,0.8816373348236084
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,16,128,1,float16,fp8,0,0.8097546895345052
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,1,128,1,float16,fp8,0,0.8650026321411133
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,1,128,1,float16,float16,0,1.0512746969858806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,4,128,1,float16,fp8,0,0.24401599168777466
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,2,128,1,float16,float16,0,1.07042129834493
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,2,128,1,float16,fp8,0,0.8626879851023356
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,4,128,1,float16,fp8,0,0.8640267054239908
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,8,128,1,float16,fp8,0,0.864192008972168
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,8,128,1,float16,float16,0,0.9385173320770264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,16,1,128,1,float16,float16,0,1.6934827168782551
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,16,128,1,float16,float16,0,0.46991999944051105
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,16,128,1,float16,fp8,0,0.430400013923645
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,1,128,1,float16,float16,0,0.5509066581726074
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,1,128,1,float16,fp8,0,0.4628213246663411
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,2,128,1,float16,float16,0,0.5524799823760986
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,2,128,1,float16,fp8,0,0.4596906503041585
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,4,128,1,float16,float16,0,0.5436373154322306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,4,128,1,float16,fp8,0,0.4596533377965291
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,8,128,1,float16,float16,0,0.46724800268809
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,16,8,128,1,float16,fp8,0,0.45853865146636963
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,1,128,1,float16,float16,0,0.2881653308868408
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,16,128,1,float16,fp8,0,0.25806933641433716
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,16,128,1,float16,float16,0,0.2640586694081624
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,1,128,1,float16,fp8,0,0.2604479988416036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,2,128,1,float16,float16,0,0.2987626592318217
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,2,128,1,float16,fp8,0,0.23906666040420532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,4,128,1,float16,float16,0,0.2959466576576233
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,4,128,1,float16,fp8,0,0.2590399980545044
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,8,128,1,float16,fp8,0,0.25830399990081787
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,16,8,128,1,float16,float16,0,0.26315200328826904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,1,128,1,float16,float16,0,0.17382399241129556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,16,128,1,float16,float16,0,0.15945067008336386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,16,2,128,1,float16,fp8,0,0.24405866861343384
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,1,128,1,float16,fp8,0,0.14628799756368002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,2,128,1,float16,fp8,0,0.15624533096949259
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,16,16,128,1,float16,float16,0,0.7782346407572428
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,4,128,1,float16,float16,0,0.15679466724395752
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,4,128,1,float16,fp8,0,0.1552853286266327
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,8,128,1,float16,fp8,0,0.15517333149909973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,8,128,1,float16,float16,0,0.1575146714846293
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,16,4,128,1,float16,float16,0,1.0641547044118245
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,2,128,1,float16,float16,0,1.7606132825215657
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,1,128,1,float16,float16,0,1.5910293261210124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,2,128,1,float16,fp8,0,1.5711733500162761
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,4,128,1,float16,float16,0,1.5952213605244954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,4,128,1,float16,fp8,0,1.575994650522868
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,16,128,1,float16,float16,0,0.8305546442667643
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,1,128,1,float16,float16,0,0.9863359928131104
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,16,128,1,float16,fp8,0,0.8084373474121094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,8,128,1,float16,float16,0,1.6031306584676106
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,8,128,1,float16,fp8,0,1.475920041402181
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,1,128,1,float16,fp8,0,0.8067626953125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,2,128,1,float16,float16,0,0.975541353225708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,4,128,1,float16,float16,0,0.8180800278981527
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,2,128,1,float16,float16,0,0.16058666507403055
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,4,128,1,float16,fp8,0,0.8072746594746908
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,8,128,1,float16,fp8,0,0.7565493583679199
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,8,128,1,float16,float16,0,0.8217493693033854
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,16,128,1,float16,fp8,0,0.39823468526204425
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,1,128,1,float16,float16,0,0.5125279823939005
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,1,128,1,float16,fp8,0,0.4243306716283162
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,16,1,128,1,float16,fp8,0,1.4718613624572754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,2,128,1,float16,fp8,0,0.4244053363800049
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,4,128,1,float16,float16,0,0.43188798427581787
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,4,128,1,float16,fp8,0,0.42364267508188885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,8,128,1,float16,fp8,0,0.42524266242980957
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,16,128,1,float16,float16,0,0.2402613361676534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,16,128,1,float16,fp8,0,0.2190773288408915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,1,128,1,float16,float16,0,0.23599465688069662
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,16,2,128,1,float16,fp8,0,0.8060692946116129
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,16,16,128,1,float16,fp8,0,0.15441067020098367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,1,128,1,float16,fp8,0,0.2328266700108846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,2,128,1,float16,fp8,0,0.23317333062489828
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,4,128,1,float16,float16,0,0.26714666684468585
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,4,128,1,float16,fp8,0,0.2185279925664266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,16,128,1,float16,float16,0,0.4354613224665324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,8,128,1,float16,float16,0,0.23720000187555948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,16,128,1,float16,float16,0,0.14216533303260803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,1,128,1,float16,float16,0,0.15542399883270264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,16,128,1,float16,fp8,0,0.12774933377901712
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,1,128,1,float16,fp8,0,0.13731732964515686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,2,128,1,float16,float16,0,0.49690667788187665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,2,128,1,float16,float16,0,0.15435733397801718
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,2,128,1,float16,fp8,0,0.12582932909329733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,4,128,1,float16,float16,0,0.1400213340918223
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,4,128,1,float16,fp8,0,0.1364479959011078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,8,128,1,float16,fp8,0,0.12607999642690024
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,1,128,1,float16,float16,0,0.1002400020758311
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,16,128,1,float16,float16,0,0.0911253293355306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,16,128,1,float16,fp8,0,0.08275733391443889
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,1,128,1,float16,fp8,0,0.08275733391443889
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,2,128,1,float16,float16,0,0.09910399715105693
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,2,128,1,float16,fp8,0,0.08273600041866302
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,16,8,128,1,float16,float16,0,0.4603360096613566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,4,128,1,float16,float16,0,0.09084266424179077
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,4,128,1,float16,fp8,0,0.08277866741021474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,8,128,1,float16,float16,0,0.09020800391832988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,16,8,128,1,float16,fp8,0,0.08225066463152568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,2,128,1,float16,float16,0,0.9590613047281901
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,1,128,1,float16,fp8,0,0.8864479859670004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,1,128,1,float16,float16,0,1.1798880100250244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,2,128,1,float16,float16,0,0.23626132806142172
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,2,128,1,float16,fp8,0,0.9464106559753418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,4,128,1,float16,fp8,0,0.9465226332346598
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,16,8,128,1,float16,fp8,0,0.2355733315149943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,16,128,1,float16,float16,0,0.5045280059178671
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,16,128,1,float16,fp8,0,0.4631733496983846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,8,128,1,float16,float16,0,1.042197306950887
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,1,128,1,float16,float16,0,0.5950453281402588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,1,128,1,float16,fp8,0,0.4942613442738851
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,2,128,1,float16,fp8,0,0.4894293149312337
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,4,128,1,float16,float16,0,0.527786652247111
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,16,8,128,1,float16,float16,0,0.14044800400733948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,4,128,1,float16,fp8,0,0.4896426598230998
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,8,128,1,float16,fp8,0,0.49053335189819336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,8,128,1,float16,float16,0,0.49850666522979736
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,1,128,1,float16,float16,0,0.3059200048446655
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,16,128,1,float16,float16,0,0.26981866359710693
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,16,128,1,float16,fp8,0,0.24654932816823324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,1,128,1,float16,fp8,0,0.26264532407124835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,2,128,1,float16,fp8,0,0.2616906762123108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,4,128,1,float16,float16,0,0.2653866608937581
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,4,128,1,float16,fp8,0,0.24723732471466064
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,8,128,1,float16,float16,0,0.27082665761311847
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,8,128,1,float16,fp8,0,0.24516799052556357
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,16,128,1,float16,float16,0,0.15330132842063904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,4,128,1,float16,float16,0,0.9609440167744955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,1,128,1,float16,float16,0,0.16769067446390787
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,1,128,1,float16,fp8,0,0.14756799737612405
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,16,128,1,float16,fp8,0,0.13871999581654867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,2,128,1,float16,float16,0,0.16940800348917642
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,2,128,1,float16,fp8,0,0.1472640037536621
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,4,128,1,float16,float16,0,0.16524799664815268
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,4,128,1,float16,fp8,0,0.1363146702448527
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,8,128,1,float16,fp8,0,0.14684800306955972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,16,8,128,1,float16,float16,0,0.15121066570281982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,1,128,1,float16,float16,0,0.09292266766230266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,16,128,1,float16,float16,0,0.09403733412424724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,16,128,1,float16,fp8,0,0.08281066517035167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,16,8,128,1,float16,fp8,0,0.947429339090983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,2,128,1,float16,fp8,0,0.0830080012480418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,4,128,1,float16,float16,0,0.09116799632708232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,4,128,1,float16,fp8,0,0.08426133791605632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,16,2,128,1,float16,float16,0,0.5952533483505249
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,8,128,1,float16,fp8,0,0.08263466755549113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,1,128,1,float16,float16,0,0.06676800052324931
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,1,128,1,float16,fp8,0,0.05598400036493937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,16,128,1,float16,float16,0,0.061162665486335754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,16,128,1,float16,fp8,0,0.0557226687669754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,2,128,1,float16,float16,0,0.06669866542021434
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,2,128,1,float16,fp8,0,0.05593066910902659
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,4,128,1,float16,float16,0,0.060880000392595925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,4,128,1,float16,fp8,0,0.05605866511662801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,8,128,1,float16,float16,0,0.06688533226648967
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,16,8,128,1,float16,fp8,0,0.056032001972198486
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,16,2,128,1,float16,float16,0,0.3023306727409363
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,1,128,1,float16,float16,0,0.9412799676259359
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,1,128,1,float16,fp8,0,0.9261439641316732
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,2,128,1,float16,float16,0,0.9422132968902588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,2,128,1,float16,fp8,0,0.9258453051249186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,4,128,1,float16,float16,0,0.9452213446299235
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,4,128,1,float16,fp8,0,0.8762826919555664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,1,128,1,float16,fp8,0,0.08923199772834778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,2,128,1,float16,float16,0,0.09965866804122925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,16,8,128,1,float16,float16,0,0.09914132952690125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,16,128,1,float16,float16,0,0.4949546655019124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,16,128,1,float16,fp8,0,0.45556267102559406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,1,128,1,float16,float16,0,0.5140533447265625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,1,128,1,float16,fp8,0,0.4750346740086873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,8,128,1,float16,fp8,0,0.8785653114318848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,2,128,1,float16,float16,0,0.5065866708755493
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,4,128,1,float16,float16,0,0.5194933414459229
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,2,128,1,float16,fp8,0,0.44948267936706543
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,4,128,1,float16,fp8,0,0.4493866761525472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,8,128,1,float16,fp8,0,0.47643200556437176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,16,8,128,1,float16,float16,0,0.48817066351572674
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,16,128,1,float16,fp8,0,0.24757333596547446
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,1,128,1,float16,fp8,0,0.2540586590766907
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,1,128,1,float16,float16,0,0.25598933299382526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,2,128,1,float16,float16,0,0.27745066086451214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,16,128,1,float16,float16,0,0.26149866978327435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,2,128,1,float16,fp8,0,0.25380265712738037
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,4,128,1,float16,float16,0,0.2621386647224426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,4,128,1,float16,fp8,0,0.2526080012321472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,8,128,1,float16,float16,0,0.26313600937525433
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,16,8,128,1,float16,fp8,0,0.25196266174316406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,16,128,1,float16,float16,0,0.1450933317343394
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,1,128,1,float16,fp8,0,0.12845333417256674
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,16,128,1,float16,fp8,0,0.13218667109807333
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,2,128,1,float16,float16,0,0.14513066411018372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,2,128,1,float16,fp8,0,0.1383573313554128
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,4,128,1,float16,float16,0,0.15876266360282898
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,4,128,1,float16,fp8,0,0.1386560002962748
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,8,128,1,float16,fp8,0,0.13902933398882547
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,1,128,1,float16,float16,0,0.09089600046475728
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,1,128,1,float16,fp8,0,0.07645866771539052
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,2,128,1,float16,float16,0,0.09117333094278972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,16,128,1,float16,fp8,0,0.07646400233109792
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,2,128,1,float16,fp8,0,0.0817440003156662
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,4,128,1,float16,float16,0,0.08340266346931458
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,8,128,1,float16,float16,0,0.08470400174458821
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,8,128,1,float16,fp8,0,0.08144533137480418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,16,8,128,1,float16,float16,0,0.95033065478007
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,16,128,1,float16,float16,0,0.05468266705671946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,16,128,1,float16,fp8,0,0.049973333875338234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,1,128,1,float16,fp8,0,0.048245335618654885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,2,128,1,float16,float16,0,0.056048000852266945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,2,128,1,float16,fp8,0,0.05267733335494995
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,4,128,1,float16,float16,0,0.054058666030565895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,8,128,1,float16,float16,0,0.05402133365472158
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,8,128,1,float16,fp8,0,0.049253334601720176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,16,128,1,float16,float16,0,0.03386666625738144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,1,128,1,float16,float16,0,0.1588106652100881
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,1,128,1,float16,float16,0,0.03724266588687897
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,1,128,1,float16,fp8,0,0.033573334415753685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,2,128,1,float16,float16,0,0.033600000043710075
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,2,128,1,float16,fp8,0,0.033514666060606636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,16,8,128,1,float16,float16,0,0.14275200168291727
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,4,128,1,float16,float16,0,0.035690667728583016
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,4,128,1,float16,fp8,0,0.03333866596221924
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,16,128,1,float16,float16,0,0.08669867118199666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,8,128,1,float16,fp8,0,0.03350399931271871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,16,4,128,1,float16,fp8,0,0.08242666721343994
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,1,128,1,float16,float16,0,0.6917866865793864
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,1,128,1,float16,float16,0,0.056143999099731445
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,1,128,1,float16,fp8,0,0.5494453509648641
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,2,128,1,float16,float16,0,0.6138720115025839
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,2,128,1,float16,fp8,0,0.5772213141123453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,16,4,128,1,float16,fp8,0,0.05167999863624573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,4,128,1,float16,float16,0,0.5924906730651855
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,4,128,1,float16,fp8,0,0.5505280097325643
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,8,128,1,float16,float16,0,0.5965760151545206
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,16,128,1,float16,fp8,0,0.30131200949350995
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,16,8,128,1,float16,fp8,0,0.5750613212585449
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,1,128,1,float16,float16,0,0.3060266574223836
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,1,128,1,float16,fp8,0,0.2844746708869934
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,16,128,1,float16,fp8,0,0.031397332747777305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,16,128,1,float16,float16,0,0.3134506742159526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,2,128,1,float16,float16,0,0.3068693280220032
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,4,128,1,float16,float16,0,0.32602133353551227
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,4,128,1,float16,fp8,0,0.28518933057785034
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,2,128,1,float16,fp8,0,0.30259732405344647
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,8,128,1,float16,fp8,0,0.286080002784729
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,16,8,128,1,float16,float16,0,0.3102346658706665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,16,128,1,float16,float16,0,0.16972267627716064
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,1,128,1,float16,float16,0,0.18323200941085815
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,1,128,1,float16,fp8,0,0.151829332113266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,2,128,1,float16,float16,0,0.17045867443084717
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,4,128,1,float16,float16,0,0.16567466656366983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,2,128,1,float16,fp8,0,0.15198933084805807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,4,128,1,float16,fp8,0,0.16361600160598755
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,16,128,1,float16,fp8,0,0.15507200360298157
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,8,128,1,float16,float16,0,0.1665600041548411
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,16,8,128,1,float16,fp8,0,0.15356266498565674
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,1,128,1,float16,float16,0,0.09318400422732036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,16,128,1,float16,fp8,0,0.088319996992747
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,16,128,1,float16,float16,0,0.09645332892735799
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,2,128,1,float16,fp8,0,0.09121066331863403
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,4,128,1,float16,fp8,0,0.08473599950472514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,8,128,1,float16,float16,0,0.09458667039871216
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,8,128,1,float16,fp8,0,0.08639466762542725
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,16,128,1,float16,float16,0,0.05835733314355215
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,16,128,1,float16,fp8,0,0.051674668987592064
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,1,128,1,float16,fp8,0,0.0521066685517629
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,2,128,1,float16,float16,0,0.05686399837334951
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,2,128,1,float16,fp8,0,0.054058666030565895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,4,128,1,float16,float16,0,0.05977066854635874
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,4,128,1,float16,fp8,0,0.054671997825304665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,8,128,1,float16,float16,0,0.060138667623202004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,8,128,1,float16,fp8,0,0.054805333415667214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,16,128,1,float16,float16,0,0.0413973331451416
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,16,128,1,float16,fp8,0,0.03738666574160258
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,1,128,1,float16,float16,0,0.03754133234421412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,1,128,1,float16,fp8,0,0.03742400060097376
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,2,128,1,float16,float16,0,0.037445334096749626
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,2,128,1,float16,fp8,0,0.036677333215872444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,16,8,128,1,float16,float16,0,0.03338133295377096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,4,128,1,float16,fp8,0,0.03745600084463755
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,4,128,1,float16,float16,0,0.03772266705830892
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,1,128,1,float16,fp8,0,0.09103467067082723
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,2,128,1,float16,float16,0,0.09496000409126282
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,8,128,1,float16,fp8,0,0.037402667105197906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,16,4,128,1,float16,float16,0,0.09393067161242168
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,1,128,1,float16,float16,0,0.02743999908367793
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,1,128,1,float16,fp8,0,0.02712533374627431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,16,128,1,float16,fp8,0,0.025461333493391674
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,2,128,1,float16,float16,0,0.027221334477265675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,2,128,1,float16,fp8,0,0.02513066679239273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,4,128,1,float16,float16,0,0.02922666569550832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,4,128,1,float16,fp8,0,0.025226667523384094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,8,128,1,float16,fp8,0,0.027024000883102417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,16,1,128,1,float16,float16,0,0.05653866628805796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,8,128,1,float16,float16,0,0.02737066646416982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,1,128,1,float16,float16,0,0.6166133483250936
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,1,128,1,float16,fp8,0,0.6003359953562418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,2,128,1,float16,float16,0,0.6357066631317139
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,2,128,1,float16,fp8,0,0.5945066610972086
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,4,128,1,float16,float16,0,0.6200639804204305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,4,128,1,float16,fp8,0,0.5774186849594116
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,8,128,1,float16,float16,0,0.6246453523635864
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,16,8,128,1,float16,fp8,0,0.60207466284434
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,16,128,1,float16,float16,0,0.32504000266393024
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,16,8,128,1,float16,float16,0,0.03775466730197271
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,16,16,128,1,float16,float16,0,0.027061333258946735
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,2,128,1,float16,fp8,0,0.310810665289561
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,2,128,1,float16,float16,0,0.3565280040105184
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,16,128,1,float16,fp8,0,0.2996586759885152
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,4,128,1,float16,float16,0,0.3328373432159424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,8,128,1,float16,float16,0,0.32156266768773395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,4,128,1,float16,fp8,0,0.2977866729100545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,1,128,1,float16,fp8,0,0.30993600686391193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,16,128,1,float16,float16,0,0.17278399070103964
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,1,128,1,float16,float16,0,0.35865600903828937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,1,128,1,float16,fp8,0,0.16666666666666666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,2,128,1,float16,fp8,0,0.1567520002524058
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,1,128,1,float16,float16,0,0.18475200732549033
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,2,128,1,float16,float16,0,0.175546665986379
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,4,128,1,float16,fp8,0,0.16352533300717673
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,4,128,1,float16,float16,0,0.1865760087966919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,8,128,1,float16,float16,0,0.1714293360710144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,8,128,1,float16,fp8,0,0.15826666355133057
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,16,128,1,float16,float16,0,0.09563733140627544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,2,128,1,float16,float16,0,0.1034986674785614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,2,128,1,float16,fp8,0,0.09028266867001851
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,1,128,1,float16,fp8,0,0.0918239951133728
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,4,128,1,float16,float16,0,0.09860266248385112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,4,128,1,float16,fp8,0,0.09095999598503113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,8,128,1,float16,fp8,0,0.08681066830952962
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,1,128,1,float16,float16,0,0.060191998879114784
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,16,128,1,float16,float16,0,0.06006399790445963
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,1,128,1,float16,fp8,0,0.053264002005259194
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,2,128,1,float16,float16,0,0.06016000111897787
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,2,128,1,float16,fp8,0,0.052853330969810486
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,16,16,128,1,float16,fp8,0,0.15876799821853638
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,4,128,1,float16,float16,0,0.057802667220433555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,4,128,1,float16,fp8,0,0.053413331508636475
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,16,8,128,1,float16,fp8,0,0.31152000029881793
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,8,128,1,float16,fp8,0,0.05382933219273885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,1,128,1,float16,float16,0,0.03832533210515976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,16,128,1,float16,fp8,0,0.03359466542800268
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,1,128,1,float16,fp8,0,0.03419733295838038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,2,128,1,float16,float16,0,0.03866666555404663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,2,128,1,float16,fp8,0,0.033626665671666466
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,4,128,1,float16,float16,0,0.03743999948104223
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,4,128,1,float16,fp8,0,0.03350399931271871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,16,128,1,float16,fp8,0,0.08713600039482117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,8,128,1,float16,fp8,0,0.034458667039871216
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,1,128,1,float16,float16,0,0.09566932916641235
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,1,128,1,float16,float16,0,0.023354666928450268
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,16,8,128,1,float16,float16,0,0.09523199995358785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,1,128,1,float16,fp8,0,0.021370666722456615
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,2,128,1,float16,float16,0,0.02459733436505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,16,128,1,float16,fp8,0,0.053360000252723694
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,4,128,1,float16,float16,0,0.02327999969323476
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,4,128,1,float16,fp8,0,0.021290667355060577
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,8,128,1,float16,float16,0,0.025253333151340485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,16,8,128,1,float16,float16,0,0.05599466462930044
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,8,128,1,float16,fp8,0,0.021130666136741638
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,16,128,1,float16,float16,0,0.03522666543722153
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,16,128,1,float16,fp8,0,0.020986666282018025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,16,128,1,float16,float16,0,0.02125866711139679
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,1,128,1,float16,fp8,0,0.021317332983016968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,2,128,1,float16,float16,0,0.02332799881696701
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,2,128,1,float16,fp8,0,0.020975999534130096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,4,128,1,float16,float16,0,0.02293333411216736
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,4,128,1,float16,fp8,0,0.021007999777793884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,16,8,128,1,float16,float16,0,0.03562666724125544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,8,128,1,float16,fp8,0,0.020917333662509918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,8,128,1,float16,float16,0,0.02128000060717265
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,16,128,1,float16,float16,0,0.0230880007147789
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,2,128,1,float16,fp8,0,0.021290667355060577
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,1,128,1,float16,float16,0,0.4558773438135783
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,1,128,1,float16,fp8,0,0.43355735143025714
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,2,128,1,float16,float16,0,0.475328008333842
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,2,128,1,float16,fp8,0,0.43558398882548016
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,16,1,128,1,float16,float16,0,0.021013334393501282
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,4,128,1,float16,float16,0,0.45974934101104736
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,4,128,1,float16,fp8,0,0.42504000663757324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,16,128,1,float16,float16,0,0.2403786579767863
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,16,128,1,float16,fp8,0,0.22192533810933432
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,16,16,128,1,float16,fp8,0,0.02083733429511388
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,8,128,1,float16,fp8,0,0.43718934059143066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,16,8,128,1,float16,float16,0,0.4615039825439453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,1,128,1,float16,float16,0,0.2516106764475505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,2,128,1,float16,float16,0,0.23592533667882284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,2,128,1,float16,fp8,0,0.22734399636586508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,1,128,1,float16,fp8,0,0.21949867407480875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,4,128,1,float16,float16,0,0.23690666755040488
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,4,128,1,float16,fp8,0,0.22750399510065714
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,1,128,1,float16,fp8,0,0.11589866876602173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,1,128,1,float16,float16,0,0.12636267145474753
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,16,128,1,float16,float16,0,0.12844799955685934
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,16,128,1,float16,fp8,0,0.11755733688672383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,4,128,1,float16,float16,0,0.12607999642690024
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,2,128,1,float16,float16,0,0.1358506679534912
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,4,128,1,float16,fp8,0,0.11552000045776367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,2,128,1,float16,fp8,0,0.11585600177447002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,8,128,1,float16,float16,0,0.2386080026626587
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,8,128,1,float16,float16,0,0.12689066926638284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,16,8,128,1,float16,fp8,0,0.21997332572937012
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,1,128,1,float16,fp8,0,0.06433066725730896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,1,128,1,float16,float16,0,0.07212799787521362
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,16,128,1,float16,float16,0,0.0721973329782486
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,4,128,1,float16,fp8,0,0.06422933439413707
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,8,128,1,float16,fp8,0,0.06392533580462138
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,8,128,1,float16,float16,0,0.07038400073846181
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,2,128,1,float16,fp8,0,0.06653333206971486
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,16,128,1,float16,float16,0,0.043466667334238686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,2,128,1,float16,float16,0,0.07652799785137177
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,1,128,1,float16,fp8,0,0.039103999733924866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,1,128,1,float16,float16,0,0.041877334316571556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,2,128,1,float16,float16,0,0.042266666889190674
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,2,128,1,float16,fp8,0,0.03902400036652883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,4,128,1,float16,float16,0,0.040949332217375435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,8,128,1,float16,float16,0,0.04144533226887385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,8,128,1,float16,fp8,0,0.03808533400297165
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,16,128,1,float16,float16,0,0.026799999177455902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,1,128,1,float16,float16,0,0.027690666417280834
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,16,128,1,float16,fp8,0,0.023984000086784363
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,1,128,1,float16,fp8,0,0.023311999936898548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,2,128,1,float16,float16,0,0.027376001079877216
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,2,128,1,float16,fp8,0,0.025146665672461193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,4,128,1,float16,float16,0,0.025386666258176167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,4,128,1,float16,fp8,0,0.025034666061401367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,16,128,1,float16,fp8,0,0.0662666658560435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,8,128,1,float16,float16,0,0.026799999177455902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,16,8,128,1,float16,fp8,0,0.0232640008131663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,16,128,1,float16,fp8,0,0.016917333006858826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,1,128,1,float16,float16,0,0.01720533271630605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,2,128,1,float16,float16,0,0.016858667135238647
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,2,128,1,float16,fp8,0,0.016805333395799
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,4,128,1,float16,float16,0,0.016906666258970898
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,4,128,1,float16,fp8,0,0.016906666258970898
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,8,128,1,float16,fp8,0,0.016821333517630894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,8,128,1,float16,float16,0,0.017136000096797943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,16,128,1,float16,fp8,0,0.03723733375469843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,16,128,1,float16,fp8,0,0.014848000059525171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,16,4,128,1,float16,fp8,0,0.0369759996732076
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,1,128,1,float16,fp8,0,0.014757333944241205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,2,128,1,float16,float16,0,0.01803733284274737
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,2,128,1,float16,fp8,0,0.014682666709025701
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,4,128,1,float16,float16,0,0.016869333883126576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,4,128,1,float16,fp8,0,0.01469333345691363
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,8,128,1,float16,float16,0,0.01682666689157486
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,8,128,1,float16,fp8,0,0.014752000570297241
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,16,8,128,1,float16,fp8,0,0.11580800016721089
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,16,128,1,float16,fp8,0,0.014757333944241205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,16,128,1,float16,float16,0,0.015263999501864115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,1,128,1,float16,fp8,0,0.014767999450365702
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,1,128,1,float16,float16,0,0.015050667027632395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,16,128,1,float16,float16,0,0.016970666746298473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,2,128,1,float16,fp8,0,0.014778666198253632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,4,128,1,float16,float16,0,0.014885333677132925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,4,128,1,float16,fp8,0,0.014757333944241205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,8,128,1,float16,float16,0,0.01492799942692121
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,8,128,1,float16,fp8,0,0.014805333067973455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,16,4,128,1,float16,float16,0,0.07619733115037282
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,16,128,1,float16,float16,0,0.016879999389251072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,1,128,1,float16,float16,0,0.18704533576965332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,1,128,1,float16,fp8,0,0.18061333894729614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,2,128,1,float16,float16,0,0.1887893279393514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,2,128,1,float16,fp8,0,0.17989333470662436
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,4,128,1,float16,float16,0,0.1870186726252238
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,16,2,128,1,float16,float16,0,0.01626666635274887
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,4,128,1,float16,fp8,0,0.18042133251825967
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,16,128,1,float16,fp8,0,0.0955466628074646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,1,128,1,float16,float16,0,0.10513599713643391
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,8,128,1,float16,float16,0,0.1881600022315979
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,16,1,128,1,float16,float16,0,0.016783999900023144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,1,128,1,float16,fp8,0,0.0956106682618459
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,2,128,1,float16,fp8,0,0.09500267108281453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,2,128,1,float16,float16,0,0.10553066929181416
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,16,8,128,1,float16,fp8,0,0.17522132396697998
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,8,128,1,float16,float16,0,0.10099200407663982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,4,128,1,float16,float16,0,0.09948800007502238
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,4,128,1,float16,fp8,0,0.0925600032011668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,16,128,1,float16,float16,0,0.05680533250172933
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,8,128,1,float16,fp8,0,0.09366400043169658
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,16,1,128,1,float16,fp8,0,0.015274666249752045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,4,128,1,float16,fp8,0,0.04965866605440775
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,16,16,128,1,float16,float16,0,0.10219200452168782
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,2,128,1,float16,fp8,0,0.05186133086681366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,1,128,1,float16,float16,0,0.05571199953556061
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,16,128,1,float16,fp8,0,0.051258668303489685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,8,128,1,float16,float16,0,0.0580266664425532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,16,128,1,float16,float16,0,0.03530666728814443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,16,128,1,float16,fp8,0,0.03125333289305369
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,1,128,1,float16,float16,0,0.03498133271932602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,2,128,1,float16,float16,0,0.033333333830038704
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,2,128,1,float16,fp8,0,0.03125333289305369
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,1,128,1,float16,fp8,0,0.03108799954255422
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,8,128,1,float16,fp8,0,0.05198933184146881
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,4,128,1,float16,float16,0,0.03316266586383184
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,4,128,1,float16,fp8,0,0.031317333380381264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,8,128,1,float16,fp8,0,0.029845332105954487
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,16,8,128,1,float16,float16,0,0.03365333378314972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,16,128,1,float16,fp8,0,0.019018666197856266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,16,128,1,float16,float16,0,0.020954666038354237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,1,128,1,float16,fp8,0,0.019237333287795384
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,2,128,1,float16,float16,0,0.02117866774400075
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,2,128,1,float16,fp8,0,0.019205333044131596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,4,128,1,float16,float16,0,0.02102400114138921
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,4,128,1,float16,fp8,0,0.019002666076024372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,8,128,1,float16,float16,0,0.020629333953062694
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,8,128,1,float16,fp8,0,0.018858666221300762
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,16,128,1,float16,float16,0,0.013376000026861826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,16,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,1,128,1,float16,float16,0,0.014730667074521383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,1,128,1,float16,fp8,0,0.013157332936922709
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,1,128,1,float16,fp8,0,0.051589335004488625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,4,128,1,float16,float16,0,0.05713599920272827
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,4,128,1,float16,float16,0,0.014778666198253632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,4,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,8,128,1,float16,float16,0,0.014762666076421738
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,8,128,1,float16,fp8,0,0.012863999853531519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,16,128,1,float16,float16,0,0.012863999853531519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,1,128,1,float16,float16,0,0.012784000486135483
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,1,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,2,128,1,float16,fp8,0,0.012863999853531519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,2,128,1,float16,float16,0,0.013050666699806849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,4,128,1,float16,float16,0,0.012762666990359625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,4,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,8,128,1,float16,float16,0,0.012773333738247553
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,16,128,1,float16,float16,0,0.012730666746695837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,16,128,1,float16,fp8,0,0.012725333372751871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,1,128,1,float16,float16,0,0.012773333738247553
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,16,1,128,1,float16,float16,0,0.02086399992307027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,1,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,2,128,1,float16,float16,0,0.013722666849692663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,2,128,1,float16,fp8,0,0.012789333860079447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,4,128,1,float16,float16,0,0.012703999876976013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,4,128,1,float16,fp8,0,0.012682666381200155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,8,128,1,float16,fp8,0,0.012746666868527731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,16,8,128,1,float16,float16,0,0.013023999830087027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,16,128,1,float16,float16,0,0.012821332861979803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,1,128,1,float16,float16,0,0.012794667234023413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,16,2,128,1,float16,float16,0,0.05749333401521047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,16,128,1,float16,fp8,0,0.012698666503032049
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,2,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,16,2,128,1,float16,float16,0,0.01479999969402949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,4,128,1,float16,float16,0,0.012752000242471695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,2,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,4,128,1,float16,fp8,0,0.013962666193644205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,8,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,8,128,1,float16,float16,0,0.012831999609867731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,16,128,1,float16,fp8,0,0.012714666624863943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,16,8,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,1,128,1,float16,float16,0,0.12609066565831503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,1,128,1,float16,fp8,0,0.11542399724324544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,2,128,1,float16,float16,0,0.12804800271987915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,2,128,1,float16,fp8,0,0.1181653340657552
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,4,128,1,float16,float16,0,0.12363732854525249
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,4,128,1,float16,fp8,0,0.11718933780988057
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,2,128,1,float16,float16,0,0.012805332740147909
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,8,128,1,float16,float16,0,0.12494933605194092
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,1,128,1,float16,float16,0,0.06823466718196869
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,16,8,128,1,float16,fp8,0,0.11567999919255574
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,1,128,1,float16,fp8,0,0.062224000692367554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,2,128,1,float16,fp8,0,0.06188266475995382
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,2,128,1,float16,float16,0,0.06832000116507213
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,4,128,1,float16,float16,0,0.06664533416430156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,4,128,1,float16,fp8,0,0.062128002444903054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,8,128,1,float16,float16,0,0.06668266654014587
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,8,128,1,float16,fp8,0,0.06214400132497152
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,16,128,1,float16,float16,0,0.037791999677817024
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,1,128,1,float16,float16,0,0.03702399879693985
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,1,128,1,float16,fp8,0,0.03456533451875051
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,2,128,1,float16,float16,0,0.03755733370780945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,2,128,1,float16,fp8,0,0.033285332222779594
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,4,128,1,float16,float16,0,0.03730666637420654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,4,128,1,float16,fp8,0,0.03338133295377096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,16,1,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,16,128,1,float16,fp8,0,0.06295999884605408
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,8,128,1,float16,float16,0,0.037621334195137024
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,16,128,1,float16,float16,0,0.023205332458019257
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,1,128,1,float16,fp8,0,0.02218666672706604
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,1,128,1,float16,float16,0,0.02329600105683009
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,2,128,1,float16,float16,0,0.02314666658639908
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,2,128,1,float16,fp8,0,0.021669333179791767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,4,128,1,float16,float16,0,0.02309333284695943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,4,128,1,float16,fp8,0,0.0220266655087471
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,8,128,1,float16,float16,0,0.023077333966890972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,8,128,1,float16,fp8,0,0.02269333352645238
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,16,128,1,float16,float16,0,0.01522133375207583
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,1,128,1,float16,float16,0,0.015061333775520325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,16,128,1,float16,fp8,0,0.01482133318980535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,16,128,1,float16,fp8,0,0.034634667138258614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,1,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,2,128,1,float16,fp8,0,0.01488000030318896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,4,128,1,float16,fp8,0,0.014848000059525171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,4,128,1,float16,float16,0,0.015077333897352219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,8,128,1,float16,float16,0,0.015103999525308609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,1,128,1,float16,float16,0,0.010944000134865442
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,8,128,1,float16,fp8,0,0.014848000059525171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,16,128,1,float16,float16,0,0.011050666371981302
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,16,128,1,float16,fp8,0,0.010656000425418219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,1,128,1,float16,fp8,0,0.010933333386977514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,2,128,1,float16,float16,0,0.010928000013033548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,16,16,128,1,float16,float16,0,0.0682773341735204
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,4,128,1,float16,float16,0,0.010666667173306147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,4,128,1,float16,fp8,0,0.011045332998037338
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,2,128,1,float16,fp8,0,0.010832000523805618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,16,8,128,1,float16,fp8,0,0.03338133295377096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,8,128,1,float16,float16,0,0.010890666395425797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,1,128,1,float16,float16,0,0.010832000523805618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,16,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,16,16,128,1,float16,fp8,0,0.021312000850836437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,2,128,1,float16,fp8,0,0.010794666906197866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,16,128,1,float16,float16,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,1,128,1,float16,fp8,0,0.010853332777818045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,16,8,128,1,float16,fp8,0,0.01080000028014183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,4,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,8,128,1,float16,fp8,0,0.010431999961535135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,8,128,1,float16,float16,0,0.010944000134865442
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,16,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,1,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,16,128,1,float16,float16,0,0.01062400018175443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,2,128,1,float16,float16,0,0.010703999549150467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,2,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,4,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,8,128,1,float16,float16,0,0.010666667173306147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,8,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,16,128,1,float16,float16,0,0.010703999549150467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,16,128,1,float16,fp8,0,0.009637333452701569
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,16,2,128,1,float16,float16,0,0.015125333021084467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,2,128,1,float16,float16,0,0.010741333166758219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,2,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,4,128,1,float16,float16,0,0.010645333677530289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,4,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,8,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,8,128,1,float16,float16,0,0.011823999385039011
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,2,128,1,float16,float16,0,0.010757333288590113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,16,4,128,1,float16,float16,0,0.010970667004585266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,1,128,1,float16,float16,0,0.10847999652226765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,1,128,1,float16,fp8,0,0.10077866911888123
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,1,128,1,float16,float16,0,0.00972800018886725
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,16,4,128,1,float16,float16,0,0.010741333166758219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,2,128,1,float16,float16,0,0.1055573324362437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,1,128,1,float16,float16,0,0.010714666297038397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,4,128,1,float16,float16,0,0.10871466994285583
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,4,128,1,float16,fp8,0,0.10075199604034424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,16,1,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,8,128,1,float16,float16,0,0.10663466652234395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,8,128,1,float16,fp8,0,0.10010133186976115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,16,128,1,float16,float16,0,0.05842666824658712
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,1,128,1,float16,float16,0,0.05951466659704844
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,2,128,1,float16,fp8,0,0.053717335065205894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,16,128,1,float16,fp8,0,0.053685332338015236
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,4,128,1,float16,float16,0,0.06058666606744131
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,4,128,1,float16,fp8,0,0.053717335065205894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,8,128,1,float16,float16,0,0.05819199979305267
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,16,128,1,float16,fp8,0,0.03017599880695343
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,16,128,1,float16,float16,0,0.03338133295377096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,1,128,1,float16,fp8,0,0.029951999584833782
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,2,128,1,float16,float16,0,0.03359466542800268
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,2,128,1,float16,fp8,0,0.02945599953333537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,4,128,1,float16,float16,0,0.03333866596221924
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,16,2,128,1,float16,fp8,0,0.0990826686223348
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,4,128,1,float16,fp8,0,0.029477333029111225
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,8,128,1,float16,float16,0,0.03321066747109095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,8,128,1,float16,fp8,0,0.029557332396507263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,16,128,1,float16,float16,0,0.02093333254257838
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,1,128,1,float16,float16,0,0.021221332252025604
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,16,128,1,float16,fp8,0,0.019146667172511418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,1,128,1,float16,fp8,0,0.054042667150497437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,1,128,1,float16,fp8,0,0.019253333409627277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,2,128,1,float16,fp8,0,0.018922666708628338
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,2,128,1,float16,float16,0,0.02123733361562093
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,2,128,1,float16,float16,0,0.056133334835370384
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,4,128,1,float16,float16,0,0.021029333273569744
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,4,128,1,float16,fp8,0,0.018965333700180054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,8,128,1,float16,float16,0,0.020954666038354237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,16,8,128,1,float16,fp8,0,0.019152000546455383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,1,128,1,float16,float16,0,0.013359999905029932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,1,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,2,128,1,float16,float16,0,0.013088000317414602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,16,128,1,float16,float16,0,0.013466666142145792
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,16,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,2,128,1,float16,fp8,0,0.012741333494583765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,4,128,1,float16,float16,0,0.01481066644191742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,16,1,128,1,float16,float16,0,0.03270933280388514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,4,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,8,128,1,float16,float16,0,0.013157332936922709
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,16,8,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,1,128,1,float16,fp8,0,0.00956266683836778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,16,128,1,float16,float16,0,0.010709332923094431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,1,128,1,float16,float16,0,0.010677333921194077
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,2,128,1,float16,float16,0,0.010794666906197866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,4,128,1,float16,float16,0,0.010682666053374609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,4,128,1,float16,fp8,0,0.010735999792814255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,8,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,8,128,1,float16,float16,0,0.010751999914646149
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,1,128,1,float16,float16,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,16,128,1,float16,float16,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,1,128,1,float16,fp8,0,0.009077333534757296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,2,128,1,float16,float16,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,16,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,4,128,1,float16,float16,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,8,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,8,128,1,float16,float16,0,0.00891733355820179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,16,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,1,128,1,float16,float16,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,16,128,1,float16,fp8,0,0.008799999952316284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,1,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,2,128,1,float16,float16,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,2,128,1,float16,fp8,0,0.008778666456540426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,4,128,1,float16,fp8,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,8,128,1,float16,float16,0,0.009061333412925402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,16,8,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,1,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,16,128,1,float16,float16,0,0.009066666786869368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,16,8,128,1,float16,fp8,0,0.0521919975678126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,1,128,1,float16,fp8,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,2,128,1,float16,float16,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,2,128,1,float16,fp8,0,0.008762666955590248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,4,128,1,float16,float16,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,4,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,16,128,1,float16,fp8,0,0.010735999792814255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,8,128,1,float16,float16,0,0.008879999940594038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,16,2,128,1,float16,fp8,0,0.010399999717871347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,2,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,1,128,1,float16,float16,0,0.10103999574979146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,16,4,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,2,128,1,float16,float16,0,0.096778670946757
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,2,128,1,float16,fp8,0,0.09089600046475728
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,4,128,1,float16,float16,0,0.09638399879137675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,4,128,1,float16,fp8,0,0.09112000465393066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,16,128,1,float16,float16,0,0.05406933526198069
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,8,128,1,float16,fp8,0,0.09056533376375835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,16,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,1,128,1,float16,float16,0,0.05409599840641022
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,16,128,1,float16,fp8,0,0.04790933430194855
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,1,128,1,float16,fp8,0,0.04965866605440775
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,2,128,1,float16,float16,0,0.053674668073654175
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,16,8,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,4,128,1,float16,float16,0,0.05382933219273885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,4,128,1,float16,fp8,0,0.04923733572165171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,8,128,1,float16,float16,0,0.0524586687485377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,8,128,1,float16,fp8,0,0.04785599807898203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,16,128,1,float16,float16,0,0.03120533376932144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,1,128,1,float16,float16,0,0.031194667021433514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,16,128,1,float16,fp8,0,0.027376001079877216
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,1,128,1,float16,fp8,0,0.027973333994547527
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,1,128,1,float16,fp8,0,0.09125333031018575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,2,128,1,float16,fp8,0,0.027424000203609467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,4,128,1,float16,float16,0,0.029477333029111225
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,4,128,1,float16,fp8,0,0.028991999725500744
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,8,128,1,float16,float16,0,0.0305226668715477
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,1,128,1,float16,float16,0,0.01911466692884763
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,16,128,1,float16,float16,0,0.01926933353145917
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,16,128,1,float16,fp8,0,0.018981333822011948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,1,128,1,float16,fp8,0,0.019023999571800232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,2,128,1,float16,float16,0,0.01923199991385142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,2,128,1,float16,fp8,0,0.019130667050679524
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,4,128,1,float16,float16,0,0.01924266666173935
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,16,8,128,1,float16,float16,0,0.09701866904894511
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,4,128,1,float16,fp8,0,0.018933333456516266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,8,128,1,float16,float16,0,0.01926400015751521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,16,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,16,128,1,float16,float16,0,0.012949333836634954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,1,128,1,float16,fp8,0,0.012746666868527731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,2,128,1,float16,float16,0,0.01303999995191892
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,16,2,128,1,float16,fp8,0,0.050000001986821495
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,2,128,1,float16,fp8,0,0.013141332815090815
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,4,128,1,float16,float16,0,0.012853333105643591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,4,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,8,128,1,float16,float16,0,0.013093333691358566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,8,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,2,128,1,float16,float16,0,0.03124266614516576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,16,128,1,float16,float16,0,0.010746666540702185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,1,128,1,float16,float16,0,0.010885333021481832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,16,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,1,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,2,128,1,float16,fp8,0,0.009109333157539368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,2,128,1,float16,float16,0,0.010661333799362183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,4,128,1,float16,float16,0,0.010762666662534079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,4,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,16,8,128,1,float16,fp8,0,0.029130667448043823
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,8,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,16,8,128,1,float16,float16,0,0.010634666929642359
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,1,128,1,float16,float16,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,16,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,2,128,1,float16,float16,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,16,128,1,float16,float16,0,0.009103999783595404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,4,128,1,float16,float16,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,4,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,8,128,1,float16,float16,0,0.009088000282645226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,8,128,1,float16,fp8,0,0.008656000097592672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,16,8,128,1,float16,fp8,0,0.018906666586796444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,16,128,1,float16,fp8,0,0.00873066671192646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,16,128,1,float16,float16,0,0.00874133345981439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,2,128,1,float16,float16,0,0.008832000195980072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,2,128,1,float16,fp8,0,0.00867733359336853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,4,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,8,128,1,float16,float16,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,8,128,1,float16,fp8,0,0.00860799973209699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,16,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,1,128,1,float16,float16,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,1,128,1,float16,fp8,0,0.008762666955590248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,2,128,1,float16,float16,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,2,128,1,float16,fp8,0,0.008821333448092142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,4,128,1,float16,float16,0,0.00902399979531765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,1,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,16,2,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,4,128,1,float16,fp8,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,8,128,1,float16,fp8,0,0.008736000085870424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,8,128,1,float16,float16,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,1,128,1,float16,float16,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,1,128,1,float16,fp8,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,16,1,128,1,float16,float16,0,0.013082666943470636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,16,4,128,1,float16,float16,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,1,128,1,float16,fp8,0,0.08896533648173015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,1,128,1,float16,float16,0,0.09698133667310078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,2,128,1,float16,float16,0,0.09830400347709656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,2,128,1,float16,fp8,0,0.088837335507075
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,4,128,1,float16,float16,0,0.09784533580144246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,16,16,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,4,128,1,float16,fp8,0,0.08678399523099263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,8,128,1,float16,float16,0,0.09527466694513957
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,1,128,1,float16,float16,0,0.05417066812515259
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,16,8,128,1,float16,fp8,0,0.08880533774693807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,2,128,1,float16,float16,0,0.05182399849096934
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,2,128,1,float16,fp8,0,0.04788800080617269
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,4,128,1,float16,float16,0,0.051967998345692955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,4,128,1,float16,fp8,0,0.04750399788220724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,8,128,1,float16,float16,0,0.0553706685702006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,8,128,1,float16,fp8,0,0.047637333472569786
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,1,128,1,float16,float16,0,0.031221332649389904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,16,128,1,float16,float16,0,0.031125334401925404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,1,128,1,float16,fp8,0,0.02741333345572154
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,2,128,1,float16,float16,0,0.02941333254178365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,2,128,1,float16,fp8,0,0.027386667827765148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,4,128,1,float16,float16,0,0.029461334149042766
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,4,128,1,float16,fp8,0,0.027450665831565857
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,8,128,1,float16,float16,0,0.029493334392706554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,8,128,1,float16,fp8,0,0.02749866743882497
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,16,128,1,float16,float16,0,0.019120000302791595
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,1,128,1,float16,fp8,0,0.01889066646496455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,1,128,1,float16,float16,0,0.019237333287795384
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,2,128,1,float16,float16,0,0.019280000279347103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,2,128,1,float16,fp8,0,0.019088000059127808
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,4,128,1,float16,float16,0,0.018986667195955913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,4,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,8,128,1,float16,float16,0,0.01913600042462349
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,8,128,1,float16,fp8,0,0.01882133384545644
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,16,128,1,float16,fp8,0,0.04794666667779287
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,1,128,1,float16,fp8,0,0.04790399968624115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,16,128,1,float16,float16,0,0.01303999995191892
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,1,128,1,float16,float16,0,0.013045333325862885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,1,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,2,128,1,float16,float16,0,0.012821332861979803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,16,128,1,float16,fp8,0,0.012650666137536367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,2,128,1,float16,fp8,0,0.012730666746695837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,4,128,1,float16,fp8,0,0.012746666868527731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,4,128,1,float16,float16,0,0.012784000486135483
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,8,128,1,float16,float16,0,0.012736000120639801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,16,8,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,16,16,128,1,float16,fp8,0,0.027034667630990345
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,16,128,1,float16,float16,0,0.010341333225369453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,16,128,1,float16,fp8,0,0.008762666955590248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,2,128,1,float16,float16,0,0.010794666906197866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,4,128,1,float16,float16,0,0.010709332923094431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,2,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,4,128,1,float16,fp8,0,0.009114666531483332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,8,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,8,128,1,float16,float16,0,0.010672000547250112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,1,128,1,float16,float16,0,0.009077333534757296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,16,128,1,float16,float16,0,0.00902399979531765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,16,16,128,1,float16,fp8,0,0.01897066707412402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,1,128,1,float16,fp8,0,0.008858666444818178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,2,128,1,float16,float16,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,16,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,2,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,4,128,1,float16,float16,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,4,128,1,float16,fp8,0,0.00879466657837232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,8,128,1,float16,float16,0,0.009066666786869368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,16,8,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,16,128,1,float16,fp8,0,0.008757333581646284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,16,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,2,128,1,float16,float16,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,16,16,128,1,float16,float16,0,0.053354665637016296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,2,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,4,128,1,float16,float16,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,4,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,8,128,1,float16,float16,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,8,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,1,128,1,float16,float16,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,16,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,16,128,1,float16,float16,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,1,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,2,128,1,float16,float16,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,2,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,4,128,1,float16,float16,0,0.008709333216150602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,8,128,1,float16,float16,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,8,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,1,128,1,float16,fp8,0,0.009103999783595404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,0,0.010847999403874079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,0,0.012469333906968435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,0,0.011018666128317514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,1,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,0,0.016965333372354507
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,0,0.014778666198253632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,0,0.0273333340883255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,0,0.02309333284695943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,16,128,1,float16,float16,0,0.027050666511058807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,16,4,128,1,float16,fp8,0,0.008592000231146812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,16,1,128,1,float16,float16,0,0.010741333166758219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,0,0.0069973332186539965
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,0,0.008810666700204214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,0,0.011402666568756104
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,0,0.010805333654085795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,0,0.01685333376129468
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,0,0.008709333216150602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,16,128,1,float16,float16,0,0.01676799977819125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,0,0.007151999821265538
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,0,0.007914666707317034
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,0,0.006911999856432279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,0,0.009232000137368837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,16,1,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,0,0.008821333448092142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,0,0.011087999989589056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,0,0.010591999938090643
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,16,128,1,float16,float16,0,0.012453333785136541
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,16,128,1,float16,fp8,0,0.010735999792814255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,0,0.0069759997228781385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,0,0.008447999755541483
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,0,0.006650666395823161
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,0,0.006698666761318843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,16,128,1,float16,fp8,0,0.02314666658639908
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,0,0.006853333363930385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,16,128,1,float16,fp8,0,0.0085333331177632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,0,0.007045333584149678
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,0,0.006831999868154526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,0,0.006821333120266597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,0,0.00720000018676122
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,0,0.006538666784763336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,0,0.007157333195209503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,0,0.007173333317041397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,16,128,1,float16,float16,0,0.0086666668454806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,16,128,1,float16,fp8,0,0.00696000022192796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,0,0.006826666494210561
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,16,16,128,1,float16,fp8,0,0.014783999572197596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,0,0.006954666847983996
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,0,0.00702400008837382
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,0,0.006634666894872983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,0,0.006853333363930385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,0,0.0069226666043202085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,0,0.006917333230376244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,0,0.0069866664707660675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,0,0.006853333363930385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,16,128,1,float16,float16,0,0.006773333375652631
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,0,0.006800000245372455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,0,0.006741333131988843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,16,128,1,float16,fp8,0,0.006677333265542984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,0,0.006917333230376244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,0,0.006917333230376244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,0,0.006768000001708667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,0,0.006826666494210561
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,0,0.006570666407545407
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,0,0.006842666616042455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,0,0.0069226666043202085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,16,128,1,float16,fp8,0,0.006981333096822103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,16,128,1,float16,float16,0,0.006735999758044879
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,0,0.006735999758044879
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,0,0.007007999966541926
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,0,0.006602666651209195
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,0,0.006981333096822103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,0,0.00690133310854435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,16,128,1,float16,float16,0,0.006629333520929019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,0,0.0069333333522081375
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,16,16,128,1,float16,float16,0,0.008821333448092142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,0,0.006917333230376244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,0,0.0068693334857622785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,0,0.006965333595871925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,0,0.006853333363930385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,0,0.006864000111818314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,0,0.006917333230376244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,0,0.0069226666043202085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,1,128,1,float16,float16,0,4.21668815612793
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,0,0.0069973332186539965
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,1,128,1,float16,fp8,0,3.8754186630249023
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,0,0.006735999758044879
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,0,0.0069226666043202085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,2,128,1,float16,float16,0,5.077221234639485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,16,16,128,1,float16,fp8,0,0.0069440001000960665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,1,128,1,float16,float16,0,2.164639949798584
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,12,128,1,float16,float16,0,2.170618693033854
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,2,128,1,float16,fp8,0,3.9069652557373047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,1,128,1,float16,fp8,0,2.5536425908406577
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,4,128,1,float16,float16,0,4.62338129679362
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,2,128,1,float16,float16,0,2.3467040061950684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,2,128,1,float16,fp8,0,2.1220906575520835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,4,128,1,float16,float16,0,2.1672266324361167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,12,128,1,float16,float16,0,1.2041280269622803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,1,128,1,float16,float16,0,1.364554723103841
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,1,128,1,float16,fp8,0,1.056010643641154
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,2,128,1,float16,float16,0,1.3834293683369954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,2,128,1,float16,fp8,0,1.1335679690043132
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,4,128,1,float16,float16,0,1.345834732055664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,4,128,1,float16,fp8,0,1.1259466807047527
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,12,128,1,float16,float16,0,0.6420906782150269
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,12,128,1,float16,fp8,0,0.5883626540501913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,1,128,1,float16,float16,0,0.7395359675089518
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,12,128,1,float16,fp8,0,1.999791940053304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,4,128,1,float16,float16,0,0.7369653383890787
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,12,4,128,1,float16,fp8,0,3.887392044067383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,4,128,1,float16,fp8,0,0.5866719881693522
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,12,4,128,1,float16,fp8,0,2.1129600207010903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,1,128,1,float16,float16,0,2.7387412389119468
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,12,12,128,1,float16,fp8,0,1.1529653072357178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,1,128,1,float16,fp8,0,2.40611203511556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,2,128,1,float16,float16,0,2.442080020904541
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,12,128,1,float16,fp8,0,1.1793813705444336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,2,128,1,float16,fp8,0,2.395967960357666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,12,128,1,float16,float16,0,1.4540799458821614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,4,128,1,float16,fp8,0,2.4092052777608237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,12,4,128,1,float16,float16,0,3.0528427759806314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,2,128,1,float16,float16,0,0.7445493539174398
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,1,128,1,float16,fp8,0,1.2478453318277996
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,1,128,1,float16,float16,0,1.4345067342122395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,2,128,1,float16,float16,0,1.396000067392985
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,2,128,1,float16,fp8,0,1.2498026688893635
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,4,128,1,float16,float16,0,1.4945066769917805
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,12,4,128,1,float16,fp8,0,1.2541866302490234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,12,128,1,float16,fp8,0,0.6371786594390869
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,1,128,1,float16,float16,0,0.7743306954701742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,2,128,1,float16,float16,0,0.8072853088378906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,2,128,1,float16,fp8,0,0.6737226645151774
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,4,128,1,float16,float16,0,0.6910400390625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,4,128,1,float16,fp8,0,0.6781653563181559
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,1,128,1,float16,float16,0,0.44223467508951825
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,1,128,1,float16,fp8,0,0.38785600662231445
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,12,128,1,float16,float16,0,0.40746132532755536
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,2,128,1,float16,float16,0,0.4344319899876912
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,2,128,1,float16,fp8,0,0.38845332463582355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,4,128,1,float16,float16,0,0.4325173298517863
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,4,128,1,float16,fp8,0,0.3843093315760295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,1,128,1,float16,fp8,0,0.6239680051803589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,1,128,1,float16,float16,0,1.74399995803833
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,12,2,128,1,float16,fp8,0,0.5854293505350748
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,1,128,1,float16,fp8,0,1.7367946306864421
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,2,128,1,float16,float16,0,2.0251572926839194
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,2,128,1,float16,fp8,0,1.7180800437927246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,12,128,1,float16,float16,0,0.7177759806315104
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,12,128,1,float16,float16,0,0.914959987004598
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,4,128,1,float16,fp8,0,1.6088693936665852
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,12,128,1,float16,fp8,0,0.900170644124349
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,1,128,1,float16,float16,0,1.0832106272379558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,1,128,1,float16,fp8,0,0.8747680187225342
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,2,128,1,float16,float16,0,1.0791412989298503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,12,12,128,1,float16,fp8,0,0.3874239921569824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,4,128,1,float16,float16,0,0.9129599730173746
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,4,128,1,float16,fp8,0,0.873194694519043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,12,128,1,float16,float16,0,0.5021973450978597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,12,128,1,float16,fp8,0,0.4937973419825236
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,1,128,1,float16,float16,0,0.5769973198572794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,1,128,1,float16,fp8,0,0.4898773431777954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,2,128,1,float16,float16,0,0.5385066668192545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,2,128,1,float16,fp8,0,0.49291733900705975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,4,128,1,float16,float16,0,0.5009013414382935
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,12,4,128,1,float16,fp8,0,0.4603573481241862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,1,128,1,float16,fp8,0,0.2701386610666911
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,12,128,1,float16,float16,0,0.2966880003611247
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,12,128,1,float16,fp8,0,0.2701173424720764
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,2,128,1,float16,float16,0,0.2909333308537801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,4,128,1,float16,float16,0,0.2918826738993327
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,2,128,1,float16,fp8,0,0.28992533683776855
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,4,128,1,float16,fp8,0,0.26868800322214764
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,12,1,128,1,float16,fp8,0,0.6729493141174316
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,12,4,128,1,float16,float16,0,1.739829381306966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,1,128,1,float16,fp8,0,2.0806454022725425
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,1,128,1,float16,float16,0,2.239258607228597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,12,2,128,1,float16,fp8,0,1.0257226626078289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,2,128,1,float16,fp8,0,2.2237866719563804
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,2,128,1,float16,float16,0,2.421818733215332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,4,128,1,float16,float16,0,2.241589387257894
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,1,128,1,float16,float16,0,1.1585226853688557
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,12,128,1,float16,float16,0,1.1600693066914876
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,12,128,1,float16,fp8,0,1.1448960304260254
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,2,128,1,float16,float16,0,1.4067254066467285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,12,4,128,1,float16,fp8,0,2.220677375793457
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,2,128,1,float16,fp8,0,1.1437973181406658
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,12,1,128,1,float16,float16,0,0.2978293299674988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,1,128,1,float16,float16,0,0.7142986456553141
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,12,128,1,float16,fp8,0,0.6362026532491049
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,2,128,1,float16,float16,0,0.6154986619949341
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,2,128,1,float16,fp8,0,0.6060266494750977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,4,128,1,float16,float16,0,0.6968639691670736
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,4,128,1,float16,fp8,0,0.5994826555252075
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,12,128,1,float16,float16,0,0.3458186785380046
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,12,128,1,float16,fp8,0,0.34093864758809406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,1,128,1,float16,float16,0,0.3652533292770386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,1,128,1,float16,fp8,0,0.34249067306518555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,2,128,1,float16,float16,0,0.37193067868550617
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,2,128,1,float16,fp8,0,0.3332479993502299
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,4,128,1,float16,float16,0,0.387722651163737
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,12,4,128,1,float16,fp8,0,0.31858134269714355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,12,128,1,float16,float16,0,0.2080906629562378
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,1,128,1,float16,float16,0,0.22769065697987875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,12,128,1,float16,fp8,0,0.2026240030924479
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,1,128,1,float16,fp8,0,0.20500266551971436
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,2,128,1,float16,float16,0,0.22714134057362875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,2,128,1,float16,fp8,0,0.20405866702397665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,4,128,1,float16,float16,0,0.20387200514475504
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,4,128,1,float16,float16,0,1.155722697575887
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,1,128,1,float16,fp8,0,0.6060746510823568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,1,128,1,float16,float16,0,1.4141173362731934
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,1,128,1,float16,fp8,0,1.2960320313771565
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,2,128,1,float16,fp8,0,1.2325812975565593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,4,128,1,float16,float16,0,1.480149269104004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,4,128,1,float16,fp8,0,1.28876797358195
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,12,128,1,float16,float16,0,0.6956586837768555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,1,128,1,float16,float16,0,0.6905492941538492
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,1,128,1,float16,fp8,0,0.6855200131734213
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,2,128,1,float16,float16,0,0.6907680034637451
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,1,128,1,float16,fp8,0,1.1450080076853435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,2,128,1,float16,fp8,0,0.685914675394694
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,12,4,128,1,float16,fp8,0,0.2021226684252421
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,4,128,1,float16,float16,0,0.6914506753285726
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,4,128,1,float16,fp8,0,0.686346689860026
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,12,4,128,1,float16,fp8,0,1.147594690322876
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,1,128,1,float16,fp8,0,0.37274134159088135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,1,128,1,float16,float16,0,0.4287253220876058
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,2,128,1,float16,float16,0,0.4025813341140747
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,2,128,1,float16,fp8,0,0.37094934781392414
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,4,128,1,float16,float16,0,0.3758026758829753
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,4,128,1,float16,fp8,0,0.3717706600824992
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,12,128,1,float16,float16,0,0.22838934262593588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,1,128,1,float16,float16,0,0.23944000403086343
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,12,128,1,float16,fp8,0,0.20110932985941568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,1,128,1,float16,fp8,0,0.2097919980684916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,2,128,1,float16,float16,0,0.23841599623362222
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,2,128,1,float16,fp8,0,0.21599467595418295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,4,128,1,float16,float16,0,0.2344693342844645
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,12,4,128,1,float16,fp8,0,0.19978666305541992
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,12,12,128,1,float16,fp8,0,0.6446613470713297
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,12,128,1,float16,float16,0,0.13819199800491333
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,1,128,1,float16,float16,0,0.14995200435320535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,12,128,1,float16,fp8,0,0.13609066605567932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,1,128,1,float16,fp8,0,0.13666666547457376
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,2,128,1,float16,fp8,0,0.12614933649698892
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,4,128,1,float16,float16,0,0.1502293348312378
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,12,12,128,1,float16,float16,0,0.61736532052358
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,1,128,1,float16,float16,0,1.3779679934183757
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,1,128,1,float16,fp8,0,1.2360533078511555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,12,128,1,float16,float16,0,0.37774932384490967
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,12,2,128,1,float16,float16,0,1.5673227310180664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,2,128,1,float16,float16,0,1.5115359624226887
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,2,128,1,float16,fp8,0,1.2545066674550374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,4,128,1,float16,float16,0,1.3581013679504395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,12,4,128,1,float16,fp8,0,1.2540533542633057
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,12,128,1,float16,fp8,0,0.6095679998397827
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,1,128,1,float16,float16,0,0.6867146492004395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,2,128,1,float16,float16,0,0.13778133193651834
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,1,128,1,float16,fp8,0,0.6468480030695597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,2,128,1,float16,float16,0,0.6521439949671427
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,2,128,1,float16,fp8,0,0.6360106468200684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,4,128,1,float16,float16,0,0.6501919825871786
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,4,128,1,float16,fp8,0,0.6470239957173666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,12,128,1,float16,fp8,0,0.3256426652272542
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,1,128,1,float16,float16,0,0.39432533582051593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,12,12,128,1,float16,fp8,0,0.3481866518656413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,2,128,1,float16,float16,0,0.39555732409159344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,2,128,1,float16,fp8,0,0.34613335132598877
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,4,128,1,float16,float16,0,0.34650135040283203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,12,128,1,float16,fp8,0,0.18160533905029297
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,1,128,1,float16,float16,0,0.21905599037806192
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,1,128,1,float16,fp8,0,0.19115199645360312
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,2,128,1,float16,float16,0,0.2171786626180013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,12,4,128,1,float16,fp8,0,0.13645866513252258
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,2,128,1,float16,fp8,0,0.19221333662668863
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,4,128,1,float16,float16,0,0.1949440042177836
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,4,128,1,float16,fp8,0,0.1946559945742289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,12,128,1,float16,float16,0,0.3514133294423421
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,1,128,1,float16,float16,0,0.13132266203562418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,2,128,1,float16,float16,0,0.12771200140317282
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,1,128,1,float16,fp8,0,0.3454879919687907
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,2,128,1,float16,fp8,0,0.11518933375676473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,4,128,1,float16,float16,0,0.12982933719952902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,12,4,128,1,float16,fp8,0,0.3450666666030884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,12,128,1,float16,float16,0,0.07832000156243642
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,12,128,1,float16,fp8,0,0.07705600063006084
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,12,12,128,1,float16,float16,0,0.1981333295504252
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,1,128,1,float16,fp8,0,0.0747680018345515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,1,128,1,float16,float16,0,0.08493866523106892
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,2,128,1,float16,float16,0,0.08516800403594971
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,2,128,1,float16,fp8,0,0.07696533203125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,4,128,1,float16,float16,0,0.08492799599965413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,12,12,128,1,float16,float16,0,0.6576000054677328
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,1,128,1,float16,float16,0,0.8025813102722168
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,1,128,1,float16,fp8,0,0.7646719614664713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,2,128,1,float16,float16,0,0.9068373044331869
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,2,128,1,float16,fp8,0,0.7514026959737142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,4,128,1,float16,float16,0,0.890613317489624
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,12,4,128,1,float16,fp8,0,0.765455961227417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,1,128,1,float16,fp8,0,0.11720533172289531
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,12,128,1,float16,float16,0,0.4076266686121623
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,1,128,1,float16,float16,0,0.46114134788513184
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,4,128,1,float16,fp8,0,0.11553066968917847
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,1,128,1,float16,fp8,0,0.37509334087371826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,2,128,1,float16,float16,0,0.4595946470896403
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,2,128,1,float16,fp8,0,0.3993813196818034
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,4,128,1,float16,float16,0,0.45318933327992755
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,4,128,1,float16,fp8,0,0.3760426839192708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,1,128,1,float16,float16,0,0.24562132358551025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,12,128,1,float16,fp8,0,0.2178186575571696
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,1,128,1,float16,fp8,0,0.2179093360900879
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,12,4,128,1,float16,fp8,0,0.07654400169849396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,2,128,1,float16,float16,0,0.24139734109242758
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,2,128,1,float16,fp8,0,0.2185386617978414
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,4,128,1,float16,fp8,0,0.2182719906171163
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,1,128,1,float16,float16,0,0.13647466897964478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,12,128,1,float16,float16,0,0.12896000345547995
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,1,128,1,float16,fp8,0,0.1251253286997477
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,2,128,1,float16,float16,0,0.14171733458836874
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,2,128,1,float16,fp8,0,0.12346133589744568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,12,128,1,float16,float16,0,0.11954666177431743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,4,128,1,float16,float16,0,0.14105066657066345
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,4,128,1,float16,fp8,0,0.12365866700808208
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,12,128,1,float16,fp8,0,0.07956266899903615
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,12,128,1,float16,float16,0,0.08057599763075511
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,1,128,1,float16,fp8,0,0.08064533273379008
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,12,12,128,1,float16,fp8,0,0.40059733390808105
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,2,128,1,float16,float16,0,0.08205866813659668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,2,128,1,float16,fp8,0,0.07971733311812083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,4,128,1,float16,fp8,0,0.0745119998852412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,12,128,1,float16,float16,0,0.047685335079828896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,12,128,1,float16,fp8,0,0.04552533229192098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,1,128,1,float16,float16,0,0.05342400074005127
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,12,128,1,float16,float16,0,0.22298133373260498
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,2,128,1,float16,float16,0,0.052069331208864846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,2,128,1,float16,fp8,0,0.04826133449872335
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,12,4,128,1,float16,float16,0,0.21913599967956543
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,4,128,1,float16,fp8,0,0.04774933556715647
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,12,12,128,1,float16,fp8,0,0.12533866365750632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,1,128,1,float16,float16,0,0.7649172941843668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,1,128,1,float16,fp8,0,0.7639093399047852
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,2,128,1,float16,float16,0,0.8071413040161133
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,1,128,1,float16,float16,0,0.08672533432642619
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,2,128,1,float16,fp8,0,0.7642186482747396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,4,128,1,float16,float16,0,0.7690773010253906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,12,4,128,1,float16,float16,0,0.08889599641164143
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,12,128,1,float16,float16,0,0.40447998046875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,12,4,128,1,float16,fp8,0,0.7624693711598715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,1,128,1,float16,float16,0,0.3962719837824504
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,1,128,1,float16,fp8,0,0.04582933088143667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,12,128,1,float16,fp8,0,0.37857600053151447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,2,128,1,float16,float16,0,0.4459520181020101
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,2,128,1,float16,fp8,0,0.3957279920578003
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,4,128,1,float16,float16,0,0.3982079823811849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,12,4,128,1,float16,float16,0,0.052058666944503784
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,4,128,1,float16,fp8,0,0.3970079819361369
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,1,128,1,float16,float16,0,0.2126506765683492
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,1,128,1,float16,fp8,0,0.21265600124994913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,12,128,1,float16,fp8,0,0.2127679983774821
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,12,128,1,float16,float16,0,0.2151093284289042
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,2,128,1,float16,float16,0,0.23392534255981445
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,2,128,1,float16,fp8,0,0.19906665881474814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,4,128,1,float16,float16,0,0.23664534091949463
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,12,4,128,1,float16,fp8,0,0.20165334145228067
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,1,128,1,float16,float16,0,0.12062933047612508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,12,128,1,float16,float16,0,0.1227946678797404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,12,128,1,float16,fp8,0,0.11339199542999268
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,1,128,1,float16,fp8,0,0.11924800276756287
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,2,128,1,float16,float16,0,0.12818666299184164
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,4,128,1,float16,float16,0,0.11957333485285442
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,4,128,1,float16,fp8,0,0.11028266946474712
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,1,128,1,float16,float16,0,0.07196799914042155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,1,128,1,float16,fp8,0,0.0714026689529419
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,2,128,1,float16,float16,0,0.07146133482456207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,2,128,1,float16,fp8,0,0.07075733443101247
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,4,128,1,float16,float16,0,0.07253866891066234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,4,128,1,float16,fp8,0,0.0662613312403361
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,12,1,128,1,float16,fp8,0,0.3956480026245117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,12,128,1,float16,float16,0,0.049312000473340355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,12,128,1,float16,fp8,0,0.04554133117198944
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,1,128,1,float16,float16,0,0.052986666560173035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,1,128,1,float16,fp8,0,0.04826666911443075
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,2,128,1,float16,fp8,0,0.04781866570313772
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,4,128,1,float16,float16,0,0.048613334695498146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,4,128,1,float16,fp8,0,0.04816000163555145
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,12,128,1,float16,float16,0,0.0356480007370313
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,1,128,1,float16,float16,0,0.0373333344856898
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,12,128,1,float16,fp8,0,0.03332799921433131
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,1,128,1,float16,fp8,0,0.033530667424201965
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,2,128,1,float16,float16,0,0.036618667344252266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,2,128,1,float16,fp8,0,0.035205334424972534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,4,128,1,float16,float16,0,0.03557866563399633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,12,4,128,1,float16,fp8,0,0.03321066747109095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,12,12,128,1,float16,fp8,0,0.10905067125956218
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,12,2,128,1,float16,fp8,0,0.10988266269365947
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,12,128,1,float16,float16,0,0.0746506651242574
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,1,128,1,float16,float16,0,0.5119200150171915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,1,128,1,float16,fp8,0,0.46244800090789795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,2,128,1,float16,float16,0,0.5129333337148031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,2,128,1,float16,fp8,0,0.46377066771189374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,4,128,1,float16,float16,0,0.4901119867960612
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,12,4,128,1,float16,fp8,0,0.4873280127843221
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,2,128,1,float16,float16,0,0.256111999352773
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,12,128,1,float16,float16,0,0.2598453362782796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,12,128,1,float16,fp8,0,0.24451732635498047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,1,128,1,float16,fp8,0,0.2421226700146993
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,2,128,1,float16,fp8,0,0.255237340927124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,4,128,1,float16,fp8,0,0.25511467456817627
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,12,128,1,float16,float16,0,0.15119999647140503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,12,128,1,float16,fp8,0,0.13356799880663553
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,1,128,1,float16,fp8,0,0.13110933701197305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,2,128,1,float16,float16,0,0.149509330590566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,2,128,1,float16,fp8,0,0.13994666934013367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,4,128,1,float16,float16,0,0.1402133305867513
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,4,128,1,float16,fp8,0,0.1395786702632904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,1,128,1,float16,float16,0,0.08797867099444072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,12,128,1,float16,float16,0,0.08310399949550629
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,1,128,1,float16,fp8,0,0.07985066870848338
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,2,128,1,float16,float16,0,0.0890933374563853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,12,12,128,1,float16,fp8,0,0.07150933146476746
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,2,128,1,float16,fp8,0,0.07924800117810567
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,4,128,1,float16,fp8,0,0.08075733482837677
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,4,128,1,float16,float16,0,0.09045333663622539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,12,128,1,float16,float16,0,0.05072533090909322
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,1,128,1,float16,float16,0,0.0551146666208903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,12,128,1,float16,fp8,0,0.04996266464392344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,2,128,1,float16,float16,0,0.05545066793759664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,1,128,1,float16,fp8,0,0.04898133377234141
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,2,128,1,float16,fp8,0,0.05062933266162872
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,1,128,1,float16,float16,0,0.2834720015525818
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,4,128,1,float16,fp8,0,0.04791999856630961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,12,2,128,1,float16,float16,0,0.048997332652409874
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,1,128,1,float16,float16,0,0.03155199935038885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,12,128,1,float16,float16,0,0.03192000091075897
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,12,128,1,float16,fp8,0,0.029493334392706554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,1,128,1,float16,fp8,0,0.029120000700155895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,2,128,1,float16,float16,0,0.03124266614516576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,2,128,1,float16,fp8,0,0.030917334059874218
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,12,1,128,1,float16,float16,0,0.1413386662801107
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,4,128,1,float16,fp8,0,0.028714666763941448
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,1,128,1,float16,float16,0,0.03141333411137263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,1,128,1,float16,fp8,0,0.02719466636578242
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,12,128,1,float16,float16,0,0.02937600016593933
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,2,128,1,float16,float16,0,0.030938667555650074
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,2,128,1,float16,fp8,0,0.02736533433198929
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,4,128,1,float16,float16,0,0.029285334050655365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,4,128,1,float16,fp8,0,0.02938133229811986
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,12,12,128,1,float16,fp8,0,0.0759093314409256
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,1,128,1,float16,float16,0,0.51801598072052
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,1,128,1,float16,fp8,0,0.49853332837422687
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,2,128,1,float16,float16,0,0.5425920089085897
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,2,128,1,float16,fp8,0,0.49826665719350177
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,12,4,128,1,float16,float16,0,0.05580799778302511
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,4,128,1,float16,float16,0,0.5218720038731893
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,12,4,128,1,float16,fp8,0,0.5117599964141846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,12,4,128,1,float16,float16,0,0.25707733631134033
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,12,128,1,float16,float16,0,0.2754879991213481
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,12,4,128,1,float16,float16,0,0.03336533407370249
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,12,128,1,float16,fp8,0,0.26046399275461835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,12,12,128,1,float16,fp8,0,0.02935466667016347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,2,128,1,float16,float16,0,0.27020267645517987
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,2,128,1,float16,fp8,0,0.2714293400446574
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,1,128,1,float16,float16,0,0.269375999768575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,12,128,1,float16,float16,0,0.14881599942843118
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,1,128,1,float16,fp8,0,0.2573653260866801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,4,128,1,float16,float16,0,0.2852906584739685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,12,4,128,1,float16,fp8,0,0.2697920004526774
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,1,128,1,float16,float16,0,0.14593066771825156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,1,128,1,float16,fp8,0,0.14552533626556396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,12,128,1,float16,fp8,0,0.14029332995414734
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,2,128,1,float16,fp8,0,0.13779733578364053
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,2,128,1,float16,float16,0,0.1566933294137319
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,1,128,1,float16,float16,0,0.08371200164159139
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,4,128,1,float16,float16,0,0.1557813286781311
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,1,128,1,float16,fp8,0,0.08171733220418294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,2,128,1,float16,float16,0,0.08201066652933757
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,12,128,1,float16,float16,0,0.08333866794904073
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,2,128,1,float16,fp8,0,0.08165866633256276
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,12,128,1,float16,fp8,0,0.07899199922879536
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,12,4,128,1,float16,fp8,0,0.13968533277511597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,4,128,1,float16,fp8,0,0.08196799953778584
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,12,4,128,1,float16,float16,0,0.08686932921409607
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,1,128,1,float16,fp8,0,0.04720533390839895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,1,128,1,float16,float16,0,0.049312000473340355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,2,128,1,float16,float16,0,0.04990933338801066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,12,128,1,float16,float16,0,0.051557332277297974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,2,128,1,float16,fp8,0,0.046309332052866616
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,4,128,1,float16,float16,0,0.05202133456865946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,4,128,1,float16,fp8,0,0.04937066634496053
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,1,128,1,float16,fp8,0,0.03356266766786575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,1,128,1,float16,float16,0,0.034874667723973594
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,12,128,1,float16,float16,0,0.033471999069054924
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,12,128,1,float16,fp8,0,0.031386665999889374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,2,128,1,float16,fp8,0,0.03349866718053818
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,4,128,1,float16,float16,0,0.03555200000603994
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,4,128,1,float16,fp8,0,0.03346666693687439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,1,128,1,float16,float16,0,0.023381332556406658
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,1,128,1,float16,fp8,0,0.023039999107519787
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,12,128,1,float16,float16,0,0.023290666441122692
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,2,128,1,float16,fp8,0,0.023269332945346832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,4,128,1,float16,float16,0,0.023258666197458904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,4,128,1,float16,fp8,0,0.02306666721900304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,1,128,1,float16,float16,0,0.02334933231274287
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,12,128,1,float16,fp8,0,0.0232640008131663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,1,128,1,float16,fp8,0,0.02279466638962428
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,12,128,1,float16,float16,0,0.022970666488011677
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,2,128,1,float16,float16,0,0.023210667073726654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,2,128,1,float16,fp8,0,0.022757334013779957
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,4,128,1,float16,float16,0,0.023941333095232647
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,12,4,128,1,float16,fp8,0,0.022853332261244457
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,1,128,1,float16,float16,0,0.3940426508585612
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,1,128,1,float16,fp8,0,0.39354666074117023
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,2,128,1,float16,float16,0,0.3951893250147502
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,2,128,1,float16,fp8,0,0.3872906764348348
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,12,12,128,1,float16,fp8,0,0.046944002310434975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,12,2,128,1,float16,float16,0,0.033301333586374916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,12,128,1,float16,fp8,0,0.02325333406527837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,1,128,1,float16,float16,0,0.21517866849899292
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,12,128,1,float16,float16,0,0.21071465810139975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,4,128,1,float16,float16,0,0.3962080081303914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,12,4,128,1,float16,fp8,0,0.3927573362986247
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,1,128,1,float16,fp8,0,0.20222399632136026
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,2,128,1,float16,float16,0,0.2127359906832377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,2,128,1,float16,fp8,0,0.1993173360824585
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,4,128,1,float16,float16,0,0.20724799235661825
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,12,128,1,float16,fp8,0,0.10784000158309937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,1,128,1,float16,float16,0,0.11898133158683777
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,1,128,1,float16,fp8,0,0.1072746713956197
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,4,128,1,float16,fp8,0,0.20383999745051065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,2,128,1,float16,fp8,0,0.1109279990196228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,2,128,1,float16,float16,0,0.1188159982363383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,12,128,1,float16,float16,0,0.11421333750089009
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,4,128,1,float16,float16,0,0.11642133196194966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,12,4,128,1,float16,fp8,0,0.10714667042096455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,1,128,1,float16,float16,0,0.06791466474533081
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,1,128,1,float16,fp8,0,0.06238933404286703
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,12,128,1,float16,fp8,0,0.061290666460990906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,2,128,1,float16,float16,0,0.06765333314736684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,2,128,1,float16,fp8,0,0.06030400097370148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,4,128,1,float16,fp8,0,0.06170133252938589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,12,128,1,float16,float16,0,0.03929600119590759
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,12,2,128,1,float16,float16,0,0.025093334416548412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,12,128,1,float16,fp8,0,0.037605332831541695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,1,128,1,float16,fp8,0,0.03568533311287562
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,12,12,128,1,float16,fp8,0,0.2045066754023234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,2,128,1,float16,fp8,0,0.037087999284267426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,2,128,1,float16,float16,0,0.03832533210515976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,4,128,1,float16,float16,0,0.0397119993964831
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,4,128,1,float16,fp8,0,0.035349334279696144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,1,128,1,float16,float16,0,0.027109332382678986
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,1,128,1,float16,fp8,0,0.02606400102376938
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,12,128,1,float16,float16,0,0.02718399961789449
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,2,128,1,float16,float16,0,0.027322667340437572
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,12,128,1,float16,fp8,0,0.02518933266401291
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,2,128,1,float16,fp8,0,0.026015999416510265
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,4,128,1,float16,float16,0,0.027077332139015198
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,12,4,128,1,float16,fp8,0,0.025013332565625507
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,12,128,1,float16,float16,0,0.018906666586796444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,12,128,1,float16,fp8,0,0.018218666315078735
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,2,128,1,float16,float16,0,0.018986667195955913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,2,128,1,float16,fp8,0,0.01811733345190684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,4,128,1,float16,float16,0,0.01887999971707662
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,4,128,1,float16,fp8,0,0.01793066660563151
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,12,128,1,float16,float16,0,0.017258666455745697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,1,128,1,float16,float16,0,0.017162666966517765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,1,128,1,float16,fp8,0,0.01695999999841054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,2,128,1,float16,float16,0,0.017114666601022083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,4,128,1,float16,float16,0,0.06629333396752675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,2,128,1,float16,fp8,0,0.017237332959969837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,12,1,128,1,float16,float16,0,0.03815466662247976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,4,128,1,float16,fp8,0,0.01709866647919019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,12,128,1,float16,fp8,0,0.016976000120242436
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,12,128,1,float16,float16,0,0.016970666746298473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,1,128,1,float16,float16,0,0.017152000218629837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,1,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,2,128,1,float16,float16,0,0.017136000096797943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,2,128,1,float16,fp8,0,0.016832000265518825
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,4,128,1,float16,float16,0,0.01706133286158244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,12,4,128,1,float16,fp8,0,0.01708799973130226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,1,128,1,float16,fp8,0,0.016965333372354507
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,12,1,128,1,float16,float16,0,0.018944000204404194
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,12,128,1,float16,fp8,0,0.017231999586025875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,1,128,1,float16,float16,0,0.16881599028905234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,1,128,1,float16,fp8,0,0.16639999548594156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,12,12,128,1,float16,float16,0,0.06488533318042755
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,12,4,128,1,float16,float16,0,0.01717866708834966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,4,128,1,float16,float16,0,0.16966933012008667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,12,128,1,float16,fp8,0,0.09085333347320557
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,4,128,1,float16,fp8,0,0.16930667559305826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,1,128,1,float16,fp8,0,0.09251733620961507
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,1,128,1,float16,float16,0,0.09241599837938945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,2,128,1,float16,float16,0,0.09778666496276855
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,4,128,1,float16,fp8,0,0.09258133172988892
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,12,128,1,float16,float16,0,0.05606933434804281
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,4,128,1,float16,float16,0,0.09425600369771321
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,12,128,1,float16,fp8,0,0.05333866675694784
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,2,128,1,float16,fp8,0,0.09089600046475728
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,1,128,1,float16,float16,0,0.05638400216897329
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,1,128,1,float16,fp8,0,0.05296533306439718
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,4,128,1,float16,fp8,0,0.0531626691420873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,4,128,1,float16,float16,0,0.05499733487764994
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,2,128,1,float16,fp8,0,0.05269333223501841
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,2,128,1,float16,float16,0,0.16875199476877847
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,12,2,128,1,float16,fp8,0,0.16877333323160806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,12,128,1,float16,fp8,0,0.031178665657838184
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,1,128,1,float16,float16,0,0.031146667897701263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,2,128,1,float16,float16,0,0.032314665615558624
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,4,128,1,float16,fp8,0,0.029520000020662945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,4,128,1,float16,float16,0,0.032746667663256325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,2,128,1,float16,fp8,0,0.03071466585000356
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,12,12,128,1,float16,float16,0,0.09470400214195251
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,12,128,1,float16,float16,0,0.023077333966890972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,12,128,1,float16,fp8,0,0.023034666975339253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,1,128,1,float16,fp8,0,0.022970666488011677
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,4,128,1,float16,fp8,0,0.02317333221435547
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,4,128,1,float16,float16,0,0.023029332359631855
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,2,128,1,float16,float16,0,0.02333866556485494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,12,128,1,float16,float16,0,0.015194666882356008
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,12,128,1,float16,fp8,0,0.014831999937693277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,1,128,1,float16,float16,0,0.015114666273196539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,2,128,1,float16,float16,0,0.015050667027632395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,2,128,1,float16,fp8,0,0.014959999670584997
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,4,128,1,float16,float16,0,0.015130666395028433
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,12,2,128,1,float16,float16,0,0.057029331723848976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,12,128,1,float16,float16,0,0.014874666929244995
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,1,128,1,float16,float16,0,0.014746667196353277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,12,128,1,float16,fp8,0,0.01482133318980535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,12,128,1,float16,float16,0,0.03356266766786575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,12,1,128,1,float16,fp8,0,0.030261332790056866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,1,128,1,float16,fp8,0,0.014874666929244995
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,2,128,1,float16,float16,0,0.014906667172908783
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,4,128,1,float16,fp8,0,0.014848000059525171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,4,128,1,float16,float16,0,0.014725333700577417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,12,128,1,float16,float16,0,0.014938666174809137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,1,128,1,float16,float16,0,0.014848000059525171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,1,128,1,float16,float16,0,0.02293333411216736
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,2,128,1,float16,float16,0,0.01470400020480156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,2,128,1,float16,fp8,0,0.014815999815861383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,4,128,1,float16,fp8,0,0.014901333798964819
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,12,2,128,1,float16,fp8,0,0.023007998863856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,12,128,1,float16,float16,0,0.014853333433469137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,12,128,1,float16,fp8,0,0.014789332946141561
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,1,128,1,float16,float16,0,0.014858666807413101
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,1,128,1,float16,fp8,0,0.014864000181357065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,2,128,1,float16,float16,0,0.014864000181357065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,1,128,1,float16,fp8,0,0.01524266724785169
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,2,128,1,float16,fp8,0,0.014757333944241205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,4,128,1,float16,float16,0,0.014522666732470194
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,12,4,128,1,float16,fp8,0,0.014890667051076889
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,12,4,128,1,float16,fp8,0,0.014837333311637243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,1,128,1,float16,float16,0,0.10752532879511516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,1,128,1,float16,fp8,0,0.10595200459162395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,2,128,1,float16,float16,0,0.10718400279680888
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,12,2,128,1,float16,fp8,0,0.015370666980743408
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,12,128,1,float16,fp8,0,0.014661333213249842
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,1,128,1,float16,fp8,0,0.014736000448465347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,2,128,1,float16,fp8,0,0.10514133175214131
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,12,4,128,1,float16,float16,0,0.014815999815861383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,4,128,1,float16,float16,0,0.1092746655146281
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,12,4,128,1,float16,fp8,0,0.10481599966684978
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,1,128,1,float16,float16,0,0.06205333272616068
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,12,128,1,float16,fp8,0,0.05816000203291575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,1,128,1,float16,fp8,0,0.058149332801500954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,2,128,1,float16,float16,0,0.059877331058184304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,2,128,1,float16,fp8,0,0.058101331194241844
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,4,128,1,float16,fp8,0,0.057818666100502014
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,12,128,1,float16,fp8,0,0.0341386670867602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,1,128,1,float16,float16,0,0.03522133330504099
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,12,128,1,float16,float16,0,0.037317333122094475
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,1,128,1,float16,fp8,0,0.03352533280849457
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,2,128,1,float16,float16,0,0.03625600039958954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,2,128,1,float16,fp8,0,0.03494933247566223
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,4,128,1,float16,float16,0,0.0352906659245491
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,12,4,128,1,float16,fp8,0,0.03335466732581457
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,12,128,1,float16,float16,0,0.02162666618824005
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,12,128,1,float16,fp8,0,0.02128000060717265
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,2,128,1,float16,float16,0,0.021157334248224895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,4,128,1,float16,float16,0,0.021397332350413006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,4,128,1,float16,fp8,0,0.021183999876181286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,1,128,1,float16,float16,0,0.016794666647911072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,12,128,1,float16,float16,0,0.016938666502634685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,1,128,1,float16,fp8,0,0.01695466662446658
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,2,128,1,float16,float16,0,0.016805333395799
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,2,128,1,float16,fp8,0,0.01617066686352094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,4,128,1,float16,float16,0,0.016634666671355564
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,4,128,1,float16,fp8,0,0.015840000162522
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,12,128,1,float16,float16,0,0.06233066817124685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,12,128,1,float16,float16,0,0.011055999745925268
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,12,128,1,float16,fp8,0,0.011141333729028702
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,2,128,1,float16,float16,0,0.01101333275437355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,1,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,1,128,1,float16,float16,0,0.01108266661564509
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,2,128,1,float16,fp8,0,0.010874666273593903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,4,128,1,float16,float16,0,0.010863999525705973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,12,4,128,1,float16,float16,0,0.06164266665776571
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,12,4,128,1,float16,fp8,0,0.01126933346192042
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,12,128,1,float16,fp8,0,0.011077333241701126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,12,128,1,float16,float16,0,0.011061333119869232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,1,128,1,float16,fp8,0,0.010816000401973724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,1,128,1,float16,float16,0,0.011018666128317514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,2,128,1,float16,float16,0,0.011941333611806234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,4,128,1,float16,float16,0,0.01091733326514562
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,4,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,1,128,1,float16,fp8,0,0.020992000897725422
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,1,128,1,float16,float16,0,0.010837333897749582
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,2,128,1,float16,fp8,0,0.020981334149837494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,1,128,1,float16,fp8,0,0.01073066641887029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,12,128,1,float16,float16,0,0.010608000059922537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,2,128,1,float16,float16,0,0.010858666151762009
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,2,128,1,float16,fp8,0,0.01073066641887029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,4,128,1,float16,float16,0,0.01073066641887029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,4,128,1,float16,fp8,0,0.01089599976936976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,12,12,128,1,float16,fp8,0,0.016202667107184727
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,1,128,1,float16,float16,0,0.010608000059922537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,12,128,1,float16,float16,0,0.010703999549150467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,1,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,12,128,1,float16,fp8,0,0.010735999792814255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,2,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,4,128,1,float16,float16,0,0.010863999525705973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,1,128,1,float16,fp8,0,0.08249066770076752
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,2,128,1,float16,fp8,0,0.08430932958920796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,12,2,128,1,float16,fp8,0,0.0107893335322539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,4,128,1,float16,float16,0,0.0853706697622935
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,4,128,1,float16,fp8,0,0.08266133566697438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,12,12,128,1,float16,fp8,0,0.010832000523805618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,12,128,1,float16,float16,0,0.04804266492525736
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,1,128,1,float16,float16,0,0.04753600060939789
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,1,128,1,float16,fp8,0,0.04580800235271454
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,12,4,128,1,float16,fp8,0,0.010890666395425797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,2,128,1,float16,float16,0,0.047322665651639305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,2,128,1,float16,fp8,0,0.04574400186538696
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,4,128,1,float16,float16,0,0.04572266836961111
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,4,128,1,float16,fp8,0,0.04584000011285146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,12,128,1,float16,fp8,0,0.027263998985290527
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,2,128,1,float16,float16,0,0.08668800195058186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,1,128,1,float16,float16,0,0.027994667490323383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,1,128,1,float16,fp8,0,0.027471999327341717
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,2,128,1,float16,float16,0,0.029264000554879505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,2,128,1,float16,fp8,0,0.0278613343834877
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,4,128,1,float16,float16,0,0.027829334139823914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,4,128,1,float16,fp8,0,0.027471999327341717
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,1,128,1,float16,float16,0,0.01803733284274737
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,12,1,128,1,float16,float16,0,0.021935999393463135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,12,128,1,float16,float16,0,0.018266666680574417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,2,128,1,float16,float16,0,0.01788266624013583
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,2,128,1,float16,fp8,0,0.01709866647919019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,4,128,1,float16,float16,0,0.018911999960740406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,4,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,12,128,1,float16,float16,0,0.014725333700577417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,12,128,1,float16,fp8,0,0.013365333278973898
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,1,128,1,float16,float16,0,0.013306666165590286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,1,128,1,float16,fp8,0,0.01381333296497663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,2,128,1,float16,float16,0,0.013050666699806849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,2,128,1,float16,fp8,0,0.01349866638580958
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,4,128,1,float16,float16,0,0.012997332960367203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,12,4,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,12,128,1,float16,float16,0,0.01118933285276095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,12,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,1,128,1,float16,float16,0,0.010992000500361124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,12,1,128,1,float16,float16,0,0.08734933535257976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,1,128,1,float16,fp8,0,0.010666667173306147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,2,128,1,float16,float16,0,0.01098666712641716
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,2,128,1,float16,fp8,0,0.010591999938090643
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,4,128,1,float16,float16,0,0.010805333654085795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,12,4,128,1,float16,fp8,0,0.010709332923094431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,1,128,1,float16,float16,0,0.010933333386977514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,12,128,1,float16,fp8,0,0.009413333609700203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,1,128,1,float16,fp8,0,0.010816000401973724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,12,128,1,float16,fp8,0,0.016901332885026932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,12,1,128,1,float16,fp8,0,0.01716800034046173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,2,128,1,float16,float16,0,0.010666667173306147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,2,128,1,float16,fp8,0,0.010874666273593903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,4,128,1,float16,fp8,0,0.0102613332370917
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,12,128,1,float16,fp8,0,0.009162666896979014
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,12,128,1,float16,float16,0,0.009082666908701261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,1,128,1,float16,float16,0,0.009114666531483332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,1,128,1,float16,fp8,0,0.009808000177145004
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,2,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,2,128,1,float16,float16,0,0.009594666461149851
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,12,12,128,1,float16,fp8,0,0.0458186666170756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,4,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,12,128,1,float16,float16,0,0.008736000085870424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,12,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,1,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,2,128,1,float16,float16,0,0.00878399983048439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,2,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,4,128,1,float16,float16,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,4,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,12,12,128,1,float16,float16,0,0.029285334050655365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,1,128,1,float16,float16,0,0.0758186678091685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,1,128,1,float16,fp8,0,0.07227733234564464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,12,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,2,128,1,float16,fp8,0,0.07218666871388753
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,12,4,128,1,float16,float16,0,0.01081066702802976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,12,128,1,float16,float16,0,0.04147200038035711
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,4,128,1,float16,float16,0,0.07589866717656453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,4,128,1,float16,fp8,0,0.07226133346557617
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,12,4,128,1,float16,float16,0,0.0102186668664217
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,1,128,1,float16,float16,0,0.043840001026789345
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,12,1,128,1,float16,float16,0,0.008901333436369896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,1,128,1,float16,fp8,0,0.04142399877309799
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,2,128,1,float16,float16,0,0.043552001317342125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,2,128,1,float16,fp8,0,0.04014399896065394
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,4,128,1,float16,fp8,0,0.040362666050593056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,4,128,1,float16,float16,0,0.041573333243529
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,12,128,1,float16,float16,0,0.025397333006064098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,1,128,1,float16,float16,0,0.027072000006834667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,1,128,1,float16,fp8,0,0.025146665672461193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,2,128,1,float16,float16,0,0.02566933383544286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,12,128,1,float16,fp8,0,0.025034666061401367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,2,128,1,float16,fp8,0,0.025205334027608235
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,4,128,1,float16,float16,0,0.026074667771657307
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,12,4,128,1,float16,fp8,0,0.025397333006064098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,1,128,1,float16,fp8,0,0.0164533331990242
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,12,128,1,float16,float16,0,0.01681600014368693
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,2,128,1,float16,fp8,0,0.016544000556071598
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,4,128,1,float16,float16,0,0.017162666966517765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,4,128,1,float16,fp8,0,0.01703466723362605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,12,2,128,1,float16,float16,0,0.07481599847475688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,1,128,1,float16,float16,0,0.013045333325862885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,12,128,1,float16,float16,0,0.012757333616415659
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,12,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,1,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,2,128,1,float16,float16,0,0.013050666699806849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,2,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,4,128,1,float16,fp8,0,0.012608000387748083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,12,128,1,float16,float16,0,0.010645333677530289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,1,128,1,float16,float16,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,12,12,128,1,float16,fp8,0,0.039333333571751915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,1,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,2,128,1,float16,fp8,0,0.008746666833758354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,4,128,1,float16,float16,0,0.010778666784365972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,4,128,1,float16,fp8,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,12,128,1,float16,float16,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,12,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,1,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,1,128,1,float16,fp8,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,2,128,1,float16,float16,0,0.008799999952316284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,2,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,1,128,1,float16,float16,0,0.016965333372354507
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,4,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,2,128,1,float16,float16,0,0.016842667013406754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,12,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,12,128,1,float16,float16,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,1,128,1,float16,float16,0,0.008842666943868002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,1,128,1,float16,fp8,0,0.009445333232482275
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,2,128,1,float16,float16,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,2,128,1,float16,fp8,0,0.008736000085870424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,4,128,1,float16,float16,0,0.008645333349704742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,12,4,128,1,float16,fp8,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,12,4,128,1,float16,float16,0,0.012997332960367203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,12,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,1,128,1,float16,float16,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,12,128,1,float16,fp8,0,0.009695999945203463
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,1,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,2,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,12,2,128,1,float16,float16,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,4,128,1,float16,float16,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,4,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,1,128,1,float16,float16,0,0.07445866862932841
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,1,128,1,float16,fp8,0,0.06838933130105336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,2,128,1,float16,float16,0,0.0726560006539027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,12,12,128,1,float16,fp8,0,0.016106666376193363
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,2,128,1,float16,fp8,0,0.0681386689345042
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,4,128,1,float16,float16,0,0.07314666608969371
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,12,4,128,1,float16,fp8,0,0.06657066444555919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,12,128,1,float16,float16,0,0.041759997606277466
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,12,128,1,float16,float16,0,0.008639999975760778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,12,128,1,float16,fp8,0,0.039434666434923805
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,1,128,1,float16,float16,0,0.04277333120505015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,2,128,1,float16,float16,0,0.043562665581703186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,2,128,1,float16,fp8,0,0.03938666731119156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,4,128,1,float16,float16,0,0.041722665230433144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,4,128,1,float16,fp8,0,0.03868266691764196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,12,128,1,float16,float16,0,0.025040000677108765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,1,128,1,float16,float16,0,0.025306666890780132
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,12,128,1,float16,fp8,0,0.023242667317390442
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,1,128,1,float16,fp8,0,0.02325333406527837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,2,128,1,float16,float16,0,0.02514133354028066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,2,128,1,float16,fp8,0,0.023925334215164185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,4,128,1,float16,float16,0,0.025285333395004272
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,12,4,128,1,float16,fp8,0,0.0234400009115537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,1,128,1,float16,float16,0,0.01681600014368693
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,12,128,1,float16,float16,0,0.016672000288963318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,1,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,12,128,1,float16,fp8,0,0.01516266663869222
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,12,4,128,1,float16,float16,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,2,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,4,128,1,float16,float16,0,0.016927999754746754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,1,128,1,float16,float16,0,0.01267733300725619
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,1,128,1,float16,fp8,0,0.012831999609867731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,12,128,1,float16,fp8,0,0.012693333129088083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,2,128,1,float16,float16,0,0.012794667234023413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,2,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,4,128,1,float16,float16,0,0.012869333227475485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,4,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,12,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,12,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,1,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,2,128,1,float16,float16,0,0.009450666606426239
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,12,1,128,1,float16,fp8,0,0.038912000755469
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,2,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,4,128,1,float16,float16,0,0.00966933307548364
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,4,128,1,float16,fp8,0,0.009216000015536943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,1,128,1,float16,float16,0,0.00916800027092298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,12,128,1,float16,float16,0,0.008682666967312494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,12,128,1,float16,fp8,0,0.008767999708652496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,1,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,4,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,4,128,1,float16,fp8,0,0.00879466657837232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,2,128,1,float16,float16,0,0.016800000021855038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,12,128,1,float16,float16,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,12,4,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,1,128,1,float16,float16,0,0.010346666599313417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,12,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,12,12,128,1,float16,float16,0,0.01293333371480306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,2,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,4,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,4,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,12,128,1,float16,float16,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,12,128,1,float16,fp8,0,0.008725333337982496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,1,128,1,float16,float16,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,12,2,128,1,float16,float16,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,2,128,1,float16,float16,0,0.00884799969693025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,2,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,4,128,1,float16,float16,0,0.008645333349704742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,4,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,2,128,1,float16,float16,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,0,0.010698666175206503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,0,0.008858666444818178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,0,0.01190399999419848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,1,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,12,2,128,1,float16,float16,0,0.008853333070874214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,0,0.010602666685978571
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,0,0.017242666333913803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,0,0.014741333822409311
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,12,1,128,1,float16,float16,0,0.00921066664159298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,12,128,1,float16,fp8,0,0.018826667219400406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,12,128,1,float16,float16,0,0.02142400046189626
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,0,0.008682666967312494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,12,1,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,0,0.007125333572427432
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,0,0.01101333275437355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,0,0.007167999943097432
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,0,0.00697066696981589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,0,0.00860799973209699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,12,2,128,1,float16,fp8,0,0.00878399983048439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,0,0.0069440001000960665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,0,0.009141333401203156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,12,128,1,float16,float16,0,0.010602666685978571
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,0,0.006906666482488315
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,12,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,0,0.006895999734600385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,0,0.006853333363930385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,0,0.007071999832987785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,0,0.0069386667261521024
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,0,0.008629333227872849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,0,0.0068693334857622785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,0,0.006981333096822103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,12,128,1,float16,float16,0,0.008869333192706108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,0,0.006640000268816948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,0,0.0068853336075941724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,0,0.00690133310854435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,0,0.006618666773041089
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,12,128,1,float16,float16,0,0.0068693334857622785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,0,0.007002666592597961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,0,0.0068853336075941724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,0,0.006634666894872983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,0,0.006858666737874349
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,0,0.006981333096822103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,0,0.008613333106040955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,12,128,1,float16,fp8,0,0.012639999389648438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,12,12,128,1,float16,float16,0,0.014746667196353277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,12,128,1,float16,fp8,0,0.006613333399097125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,0,0.0069333333522081375
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,0,0.0069333333522081375
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,0,0.006618666773041089
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,0,0.006778666749596596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,0,0.006645333642760913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,12,128,1,float16,fp8,0,0.006645333642760913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,12,128,1,float16,float16,0,0.007098666702707608
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,0,0.006895999734600385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,0,0.006917333230376244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,0,0.006730666384100914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,12,12,128,1,float16,fp8,0,0.008832000195980072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,0,0.006634666894872983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,0,0.006602666651209195
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,0,0.006688000013430913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,0,0.006533333410819371
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,0,0.006831999868154526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,12,128,1,float16,float16,0,0.006864000111818314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,0,0.006757333253820737
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,0,0.006629333520929019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,12,128,1,float16,fp8,0,0.00696000022192796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,0,0.007322666545708974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,0,0.006768000001708667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,0,0.006751999879876773
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,12,128,1,float16,float16,0,0.006927999978264173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,1,128,1,float16,fp8,0,2.681488037109375
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,0,0.007040000210205714
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,2,128,1,float16,fp8,0,2.583183924357096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,4,128,1,float16,float16,0,2.815749486287435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,1,128,1,float16,float16,0,1.5627892812093098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,float16,0,1.5562559763590496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,4,128,1,float16,fp8,0,2.729541460673014
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,1,128,1,float16,fp8,0,1.4380319913228352
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,12,12,128,1,float16,fp8,0,0.006927999978264173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,2,128,1,float16,float16,0,1.4772639274597168
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,2,128,1,float16,fp8,0,1.4142667452494304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,4,128,1,float16,float16,0,1.4770347277323406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,4,128,1,float16,fp8,0,1.4386293093363445
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,0.7799413204193115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,float16,0,1.4732960065205891
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,1.3518239657084148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,1,128,1,float16,float16,0,0.8520159721374512
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,1,128,1,float16,fp8,0,0.777674674987793
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,1,128,1,float16,float16,0,2.8093226750691733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,2,128,1,float16,float16,0,0.9194666544596354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,8,2,128,1,float16,float16,0,2.810352007548014
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,4,128,1,float16,float16,0,0.8108959992726644
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,4,128,1,float16,fp8,0,0.7794720331827799
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,float16,0,0.4962986707687378
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,0.7806453704833984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,float16,0,0.8020533720652262
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,0.45417598883310956
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,1,128,1,float16,float16,0,0.5186026493708292
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,1.3518346150716145
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,1,128,1,float16,fp8,0,0.4424053430557251
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,2,128,1,float16,float16,0,0.5139840046564738
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,2,128,1,float16,fp8,0,0.45161600907643634
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,4,128,1,float16,float16,0,0.4710719982783
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,float16,0,0.46832001209259033
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,0.45053335030873615
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,1,128,1,float16,fp8,0,1.600602626800537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,1,128,1,float16,float16,0,1.9581759770711262
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,2,128,1,float16,float16,0,1.8462986946105957
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,2,128,1,float16,fp8,0,1.5027146339416504
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,float16,0,0.8025333086649576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,4,128,1,float16,float16,0,1.817893346150716
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,float16,0,0.8697013060251871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,0.8519893487294515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,1,128,1,float16,float16,0,0.9245333671569824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,8,4,128,1,float16,fp8,0,1.5019520123799641
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,1,128,1,float16,fp8,0,0.7967840035756429
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,2,128,1,float16,float16,0,1.0430080095926921
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,2,128,1,float16,fp8,0,0.8506453037261963
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,4,128,1,float16,fp8,0,0.7970399856567383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,4,128,1,float16,float16,0,1.0221013228098552
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,0.7990986506144205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,float16,0,0.8694453239440918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,float16,0,0.5345493157704672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,1,128,1,float16,float16,0,0.5724320014317831
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,0.4464213450749715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,1,128,1,float16,fp8,0,0.4426933526992798
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,2,128,1,float16,fp8,0,0.47258134682973224
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,8,4,128,1,float16,fp8,0,0.42601064840952557
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,4,128,1,float16,float16,0,0.5558079878489176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,float16,0,0.48765333493550617
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,0.44361599286397296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,0.28326932589213055
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,float16,0,0.29360532760620117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,1,128,1,float16,float16,0,0.32103466987609863
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,1,128,1,float16,fp8,0,0.2632853388786316
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,2,128,1,float16,float16,0,0.3207040031750997
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,2,128,1,float16,fp8,0,0.28436799844106037
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,4,128,1,float16,float16,0,0.2894986669222514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,float16,0,0.31545599301656085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,0.26428266366322833
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,1,128,1,float16,float16,0,1.1679413318634033
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,8,2,128,1,float16,fp8,0,0.7329866886138916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,1,128,1,float16,fp8,0,1.1416107018788655
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,2,128,1,float16,float16,0,1.2634560267130535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,2,128,1,float16,fp8,0,1.1468160152435303
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,4,128,1,float16,float16,0,1.169045368830363
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,0.6126506725947062
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,float16,0,0.6296533346176147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,8,4,128,1,float16,fp8,0,1.0732053120930989
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,1,128,1,float16,float16,0,0.6687466303507487
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,2,128,1,float16,float16,0,0.5578720172246298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,1,128,1,float16,fp8,0,0.6156320174535116
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,2,128,1,float16,float16,0,0.6588799953460693
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,2,128,1,float16,fp8,0,0.5757866700490316
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,8,4,128,1,float16,fp8,0,0.47202134132385254
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,4,128,1,float16,float16,0,0.7100373109181722
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,4,128,1,float16,fp8,0,0.5745919942855835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,1,128,1,float16,float16,0,0.3593386809031169
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,0.32737600803375244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,float16,0,0.3593493302663167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,2,128,1,float16,float16,0,0.3850880066553752
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,0.5763946771621704
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,2,128,1,float16,fp8,0,0.3264480034510295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,4,128,1,float16,float16,0,0.3826346794764201
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,4,128,1,float16,fp8,0,0.34997332096099854
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,0.3274773359298706
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,float16,0,0.35865068435668945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,8,4,128,1,float16,fp8,0,0.26306132475535077
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,0.21861867109934488
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,float16,0,0.2243893345197042
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,1,128,1,float16,fp8,0,0.21919999519983926
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,2,128,1,float16,fp8,0,0.20378132661183676
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,2,128,1,float16,float16,0,0.24116800228754678
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,4,128,1,float16,float16,0,0.24289067586263022
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,4,128,1,float16,fp8,0,0.21967999140421549
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,float16,0,0.2246453364690145
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,0.2029013236363729
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,1,128,1,float16,float16,0,1.6439040501912434
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,1,128,1,float16,fp8,0,1.3652373949686687
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,2,128,1,float16,fp8,0,1.3662293752034504
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,2,128,1,float16,float16,0,1.8018239339192708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,4,128,1,float16,float16,0,1.4862559636433919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,float16,0,0.7814986705780029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,0.7168959776560465
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,8,4,128,1,float16,fp8,0,1.4589653015136719
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,1,128,1,float16,float16,0,0.9469226996103922
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,1,128,1,float16,fp8,0,0.7639946937561035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,float16,0,0.6291946570078532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,8,1,128,1,float16,fp8,0,0.3496640125910441
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,2,128,1,float16,fp8,0,0.7162720362345377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,4,128,1,float16,fp8,0,0.7958026727040609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,float16,0,0.7804319858551025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,float16,0,0.4275999863942464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,0.7149173418680826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,0.3918773333231608
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,8,1,128,1,float16,float16,0,0.24823466936747232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,1,128,1,float16,fp8,0,0.4172266721725464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,2,128,1,float16,float16,0,0.42524266242980957
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,2,128,1,float16,fp8,0,0.4185173511505127
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,4,128,1,float16,fp8,0,0.3901866674423218
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,0.390720009803772
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,float16,0,0.4268639882405599
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,0.24205867449442545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,1,128,1,float16,float16,0,0.2691466609636943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,2,128,1,float16,float16,0,0.2557706634203593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,2,128,1,float16,fp8,0,0.24260266621907553
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,4,128,1,float16,float16,0,0.26865599552790326
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,4,128,1,float16,fp8,0,0.2262186606725057
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,float16,0,0.2512106696764628
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,0.22782933712005615
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,float16,0,0.16054399808247885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,0.14613333344459534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,2,128,1,float16,float16,0,0.9372639656066895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,1,128,1,float16,float16,0,0.17921066284179688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,1,128,1,float16,fp8,0,0.15964800119400024
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,2,128,1,float16,float16,0,0.17815999190012613
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,2,128,1,float16,fp8,0,0.15820800264676413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,1,128,1,float16,float16,0,0.4617973168690999
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,4,128,1,float16,float16,0,0.15995200475056967
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,float16,0,0.1605280041694641
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,8,4,128,1,float16,float16,0,0.42631999651590985
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,float16,0,0.2512960036595662
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,1,128,1,float16,float16,0,0.9502879778544108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,1,128,1,float16,fp8,0,0.8094986279805502
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,8,1,128,1,float16,fp8,0,0.2444960077603658
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,2,128,1,float16,float16,0,0.8781332969665527
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,2,128,1,float16,fp8,0,0.8638453483581543
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,4,128,1,float16,float16,0,0.8812159697214762
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,8,4,128,1,float16,fp8,0,0.808298667271932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,float16,0,0.4700053135553996
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,1,128,1,float16,float16,0,0.5560746590296427
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,1,128,1,float16,fp8,0,0.4287573496500651
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,2,128,1,float16,float16,0,0.5436053276062012
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,2,128,1,float16,fp8,0,0.46031999588012695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,4,128,1,float16,float16,0,0.46798932552337646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,4,128,1,float16,fp8,0,0.45874667167663574
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,4,128,1,float16,fp8,0,0.15940266847610474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,0.4307519992192586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,float16,0,0.28780267635981244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,0.1458453337351481
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,0.25972266991933185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,1,128,1,float16,float16,0,0.2997973362604777
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,1,128,1,float16,fp8,0,0.260426660378774
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,2,128,1,float16,float16,0,0.26185067494710285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,2,128,1,float16,fp8,0,0.2605066696802775
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,4,128,1,float16,float16,0,0.28411199649175006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,4,128,1,float16,fp8,0,0.2598026593526204
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,float16,0,0.2656266689300537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,0.24247467517852783
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,1,128,1,float16,float16,0,0.17321600516637167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,0.1441439986228943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,1,128,1,float16,fp8,0,0.15749333302179971
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,2,128,1,float16,float16,0,0.15784533818562826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,2,128,1,float16,fp8,0,0.15548266967137656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,4,128,1,float16,float16,0,0.15779733657836914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,4,128,1,float16,fp8,0,0.15452266732851663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,float16,0,0.16004266341527304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,0.46004800001780194
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,0.1035040020942688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,float16,0,0.10411733388900757
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,1,128,1,float16,float16,0,0.11486400167147319
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,2,128,1,float16,float16,0,0.11521066228548686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,2,128,1,float16,fp8,0,0.10217600067456563
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,8,4,128,1,float16,float16,0,0.8410879770914713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,4,128,1,float16,float16,0,0.11377066373825073
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,4,128,1,float16,fp8,0,0.10311999917030334
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,float16,0,0.10435199737548828
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,0.10326400399208069
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,float16,0,0.471834659576416
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,1,128,1,float16,float16,0,0.8213600317637125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,1,128,1,float16,fp8,0,0.7936639785766602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,2,128,1,float16,float16,0,0.8210773468017578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,2,128,1,float16,fp8,0,0.8002346356709799
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,4,128,1,float16,float16,0,0.8254773616790771
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,0.3987413247426351
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,1,128,1,float16,float16,0,0.46484800179799396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,float16,0,0.43830398718516034
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,8,4,128,1,float16,fp8,0,0.7573706309000651
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,1,128,1,float16,fp8,0,0.4252320130666097
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,2,128,1,float16,float16,0,0.4625120162963867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,float16,0,0.15998400251070657
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,2,128,1,float16,fp8,0,0.39700265725453693
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,4,128,1,float16,float16,0,0.43275201320648193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,4,128,1,float16,fp8,0,0.39792001247406006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,0.21934932470321655
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,0.4259573221206665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,0.15518933534622192
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,1,128,1,float16,float16,0,0.27165865898132324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,1,128,1,float16,fp8,0,0.23345067103703818
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,2,128,1,float16,float16,0,0.2540266712506612
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,8,1,128,1,float16,fp8,0,0.10288533568382263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,2,128,1,float16,fp8,0,0.2363413373629252
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,4,128,1,float16,float16,0,0.27005332708358765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,4,128,1,float16,fp8,0,0.23484800259272257
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,float16,0,0.2416693369547526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,float16,0,0.14218133687973022
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,0.12745599945386252
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,2,128,1,float16,float16,0,0.15156267086664835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,2,128,1,float16,fp8,0,0.1362826625506083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,4,128,1,float16,float16,0,0.14035200079282126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,4,128,1,float16,fp8,0,0.1359999974568685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,float16,0,0.14150933424631754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,0.12769599755605063
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,0.08861866593360901
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,1,128,1,float16,float16,0,0.10111467043558757
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,1,128,1,float16,fp8,0,0.09014399846394856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,2,128,1,float16,fp8,0,0.08277333279450734
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,4,128,1,float16,float16,0,0.09899200002352397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,4,128,1,float16,fp8,0,0.0825973351796468
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,float16,0,0.24039467175801596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,float16,0,0.43796801567077637
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,float16,0,0.09090133508046468
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,0.08261866867542267
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,float16,0,0.05787200232346853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.05596800148487091
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,1,128,1,float16,float16,0,0.05801600217819214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,1,128,1,float16,fp8,0,0.05735466877619425
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,2,128,1,float16,float16,0,0.059578667084376015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,0.23004267613093057
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,2,128,1,float16,fp8,0,0.05225066840648651
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,4,128,1,float16,float16,0,0.05779199798901876
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,4,128,1,float16,fp8,0,0.05442666510740916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,float16,0,0.05821333328882853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.051882664362589516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,float16,0,0.09870933492978413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,1,128,1,float16,float16,0,0.5279306570688883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,1,128,1,float16,fp8,0,0.4590826829274495
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,2,128,1,float16,float16,0,0.4997119903564453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,8,2,128,1,float16,float16,0,0.10053333640098572
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,4,128,1,float16,float16,0,0.5348533391952515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,4,128,1,float16,fp8,0,0.4603360096613566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,0.2472106615702311
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,1,128,1,float16,float16,0,0.30083199342091876
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,1,128,1,float16,fp8,0,0.26181334257125854
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,2,128,1,float16,float16,0,0.3022613326708476
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,2,128,1,float16,fp8,0,0.26502400636672974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,4,128,1,float16,float16,0,0.28682132562001544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,4,128,1,float16,fp8,0,0.26426132520039874
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,float16,0,0.2714186708132426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,0.2619626720746358
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,1,128,1,float16,fp8,0,0.13781866431236267
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,float16,0,0.15426133076349893
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,0.14630400141080221
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,1,128,1,float16,float16,0,0.16972267627716064
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,1,128,1,float16,fp8,0,0.14871467153231302
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,2,128,1,float16,float16,0,0.17114132642745972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,2,128,1,float16,fp8,0,0.138373335202535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,4,128,1,float16,fp8,0,0.13847466309865317
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,float16,0,0.15371732910474142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,0.14629333217938742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,1,128,1,float16,float16,0,0.09902933239936829
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,0.08239999910195668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,1,128,1,float16,fp8,0,0.09037333726882935
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,8,2,128,1,float16,fp8,0,0.4787360032399495
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,2,128,1,float16,float16,0,0.09187199672063191
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,2,128,1,float16,fp8,0,0.08958933750788371
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,4,128,1,float16,float16,0,0.10206400354703267
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,4,128,1,float16,fp8,0,0.09047999978065491
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,float16,0,0.27212266127268475
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,float16,0,0.09276800354321797
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,0.08335999647776286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,float16,0,0.060533334811528526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,1,128,1,float16,float16,0,0.06701866785685222
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,1,128,1,float16,fp8,0,0.05983999868233999
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,2,128,1,float16,float16,0,0.06635733445485432
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,2,128,1,float16,fp8,0,0.05991999804973602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,4,128,1,float16,float16,0,0.06053866446018219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,4,128,1,float16,fp8,0,0.05991999804973602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,float16,0,0.06118933359781901
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.05942399799823761
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.04479999840259552
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,float16,0,0.04560000201066335
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,1,128,1,float16,float16,0,0.048245335618654885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,1,128,1,float16,fp8,0,0.04358399907747904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,2,128,1,float16,float16,0,0.049269333481788635
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,2,128,1,float16,fp8,0,0.04552533229192098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,4,128,1,float16,fp8,0,0.04372266431649526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,4,128,1,float16,float16,0,0.04544533292452494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,8,1,128,1,float16,float16,0,0.15307199954986572
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,float16,0,0.045696000258127846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.041482667128245033
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,1,128,1,float16,float16,0,0.5152533451716105
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,1,128,1,float16,fp8,0,0.44919999440511066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,4,128,1,float16,float16,0,0.49212801456451416
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,2,128,1,float16,float16,0,0.578661322593689
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,2,128,1,float16,fp8,0,0.4784746567408244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,float16,0,0.09356266260147095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,0.240064005057017
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,float16,0,0.26332799593607586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,8,4,128,1,float16,fp8,0,0.474181334177653
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,1,128,1,float16,float16,0,0.2932800054550171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,1,128,1,float16,fp8,0,0.23644800980885824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,2,128,1,float16,float16,0,0.25698665777842206
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.059861332178115845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,2,128,1,float16,fp8,0,0.2521013418833415
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,4,128,1,float16,float16,0,0.27824532985687256
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,4,128,1,float16,fp8,0,0.2534133394559224
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,0.13359999656677246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,0.25676266352335614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,float16,0,0.1458133359750112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,float16,0,0.26360533634821576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,2,128,1,float16,fp8,0,0.1389226714769999
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,4,128,1,float16,fp8,0,0.13082666198412576
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,float16,0,0.14615999658902487
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,float16,0,0.08674666285514832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,0.08247466882069905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,1,128,1,float16,float16,0,0.09129066268603007
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,1,128,1,float16,fp8,0,0.08311999837557475
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,8,4,128,1,float16,float16,0,0.15146666765213013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,2,128,1,float16,fp8,0,0.08216533561547597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,4,128,1,float16,float16,0,0.0841919978459676
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,4,128,1,float16,fp8,0,0.08251733581225078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,float16,0,0.08675733208656311
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,float16,0,0.0545653353134791
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.04925866425037384
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,1,128,1,float16,float16,0,0.05393599967161814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,1,128,1,float16,fp8,0,0.14064533511797586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,2,128,1,float16,float16,0,0.15965867042541504
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,1,128,1,float16,fp8,0,0.04911466439565023
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,2,128,1,float16,float16,0,0.05955733358860016
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,2,128,1,float16,fp8,0,0.049216002225875854
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,4,128,1,float16,float16,0,0.15961066881815592
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,4,128,1,float16,float16,0,0.05394133428732554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,float16,0,0.05434666574001312
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.04960533479849497
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,float16,0,0.03355200091997782
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.031184000273545582
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,1,128,1,float16,float16,0,0.03745066622893015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,1,128,1,float16,fp8,0,0.033471999069054924
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,2,128,1,float16,float16,0,0.03762666632731756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,2,128,1,float16,fp8,0,0.0334346666932106
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,4,128,1,float16,float16,0,0.03745600084463755
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,4,128,1,float16,fp8,0,0.033573334415753685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,0.08236266672611237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,float16,0,0.034117333590984344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.0314026673634847
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,float16,0,0.03315199911594391
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.032511999209721885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,1,128,1,float16,float16,0,0.03519999980926514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,1,128,1,float16,fp8,0,0.03323200096686681
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,2,128,1,float16,float16,0,0.03566933423280716
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,2,128,1,float16,fp8,0,0.03271999955177307
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,4,128,1,float16,float16,0,0.03523733218510946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,4,128,1,float16,fp8,0,0.03324266771475474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,1,128,1,float16,float16,0,0.15435199936230978
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.033301333586374916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,1,128,1,float16,float16,0,0.3325226704279582
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,8,4,128,1,float16,fp8,0,0.04993600149949392
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,1,128,1,float16,fp8,0,0.30052266518274945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,0.13286933302879333
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,2,128,1,float16,float16,0,0.33355732758839923
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,2,128,1,float16,fp8,0,0.28887466589609784
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,4,128,1,float16,float16,0,0.3120853304862976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,float16,0,0.17130666971206665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,8,4,128,1,float16,fp8,0,0.28751999139785767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,8,2,128,1,float16,float16,0,0.09193066755930583
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,1,128,1,float16,float16,0,0.177839994430542
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,0.1646346648534139
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,2,128,1,float16,float16,0,0.16595733165740967
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,4,128,1,float16,float16,0,0.17899733781814575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,4,128,1,float16,fp8,0,0.15409066279729208
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,2,128,1,float16,fp8,0,0.1643786629041036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,0.1638826628526052
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,1,128,1,float16,float16,0,0.09513066212336223
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,0.09291733304659526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,1,128,1,float16,fp8,0,0.08665600419044495
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,2,128,1,float16,float16,0,0.10213333368301392
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,2,128,1,float16,fp8,0,0.08505599697430928
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,4,128,1,float16,float16,0,0.10358400146166484
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,4,128,1,float16,fp8,0,0.09316266576449077
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,float16,0,0.09770133097966512
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,float16,0,0.05983999868233999
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,0.08782933155695598
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.05271466573079427
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,1,128,1,float16,fp8,0,0.05632533133029938
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,1,128,1,float16,float16,0,0.0625493327776591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,2,128,1,float16,fp8,0,0.05590933561325073
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,4,128,1,float16,float16,0,0.0612960010766983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,4,128,1,float16,fp8,0,0.05574933191140493
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,float16,0,0.060165335734685264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,float16,0,0.037946666280428566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.03530666728814443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,1,128,1,float16,float16,0,0.041850666205088295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,1,128,1,float16,fp8,0,0.03515200068553289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,2,128,1,float16,float16,0,0.041349334021409355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,2,128,1,float16,fp8,0,0.034917332231998444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,4,128,1,float16,float16,0,0.04140799989302953
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,4,128,1,float16,fp8,0,0.035173334181308746
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,1,128,1,float16,fp8,0,0.1530080040295919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,float16,0,0.03803733239571253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,float16,0,0.1705066760381063
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,float16,0,0.02739199995994568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.027072000006834667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,1,128,1,float16,float16,0,0.029706666866938274
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,1,128,1,float16,fp8,0,0.0271573339899381
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,float16,0,0.09758399923642476
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,2,128,1,float16,float16,0,0.03009066730737686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,2,128,1,float16,fp8,0,0.027061333258946735
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,4,128,1,float16,float16,0,0.02937600016593933
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,4,128,1,float16,fp8,0,0.027888000011444092
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.027109332382678986
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,float16,0,0.02743999908367793
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,float16,0,0.027232001225153606
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,float16,0,0.03339733431736628
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,1,128,1,float16,fp8,0,0.02720000098148982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,2,128,1,float16,float16,0,0.05587733288606008
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,2,128,1,float16,float16,0,0.029311999678611755
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,2,128,1,float16,fp8,0,0.02722666660944621
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,4,128,1,float16,float16,0,0.029077333708604176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,4,128,1,float16,fp8,0,0.025242666403452556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,float16,0,0.027109332382678986
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.05555200080076853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,1,128,1,float16,float16,0,0.3487039804458618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,1,128,1,float16,fp8,0,0.29741867383321124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,2,128,1,float16,float16,0,0.3349119822184245
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,2,128,1,float16,fp8,0,0.29798932870229083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.03533333291610082
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,4,128,1,float16,float16,0,0.3468480110168457
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,8,4,128,1,float16,fp8,0,0.2984586755434672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,0.1604586640993754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,float16,0,0.17516799767812094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,1,128,1,float16,float16,0,0.1699413259824117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.027424000203609467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,2,128,1,float16,float16,0,0.17450666427612305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,2,128,1,float16,fp8,0,0.16710400581359863
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,4,128,1,float16,float16,0,0.171941339969635
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,1,128,1,float16,fp8,0,0.157258669535319
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.025029333929220837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,4,128,1,float16,fp8,0,0.16485333442687988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,0.08809600273768108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,float16,0,0.18102399508158365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,0.16525333126386008
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,1,128,1,float16,float16,0,0.10140800476074219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,2,128,1,float16,float16,0,0.09461333354314168
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,2,128,1,float16,fp8,0,0.09283733367919922
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,1,128,1,float16,fp8,0,0.0902239978313446
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,4,128,1,float16,float16,0,0.09990400075912476
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,float16,0,0.058042665322621666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,0.08895466725031535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.052383999029795326
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,1,128,1,float16,fp8,0,0.05373866856098175
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,2,128,1,float16,float16,0,0.06015466650327047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,2,128,1,float16,fp8,0,0.052373334765434265
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,4,128,1,float16,float16,0,0.06088533500830332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,4,128,1,float16,fp8,0,0.052671998739242554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,8,1,128,1,float16,float16,0,0.029125332832336426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,float16,0,0.058058664202690125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,float16,0,0.03532266616821289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.05522666871547699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.034186666210492454
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,1,128,1,float16,fp8,0,0.033904001116752625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,1,128,1,float16,float16,0,0.037962667644023895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,2,128,1,float16,float16,0,0.03736533224582672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,2,128,1,float16,fp8,0,0.035360001027584076
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,4,128,1,float16,float16,0,0.03765333443880081
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,float16,0,0.10318400462468465
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,float16,0,0.035642666121323906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.03489600121974945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,float16,0,0.02298133323589961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.022272000710169475
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,1,128,1,float16,fp8,0,0.022602667411168415
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,2,128,1,float16,float16,0,0.02292799949645996
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,2,128,1,float16,fp8,0,0.021290667355060577
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,4,128,1,float16,float16,0,0.02332799881696701
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,4,128,1,float16,fp8,0,0.020992000897725422
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,float16,0,0.023311999936898548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,float16,0,0.09724799791971843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,float16,0,0.020997333029905956
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,1,128,1,float16,float16,0,0.023082666099071503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,1,128,1,float16,fp8,0,0.02091199904680252
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,2,128,1,float16,float16,0,0.023210667073726654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,2,128,1,float16,fp8,0,0.02120000123977661
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,4,128,1,float16,float16,0,0.021029333273569744
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,float16,0,0.021013334393501282
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.020245333512624104
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,float16,0,0.020901332298914593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,8,4,128,1,float16,fp8,0,0.03428266694148382
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.01960533360640208
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,1,128,1,float16,float16,0,0.022255999346574146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,1,128,1,float16,fp8,0,0.01959466685851415
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,1,128,1,float16,float16,0,0.023365333676338196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,2,128,1,float16,fp8,0,0.019146667172511418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,8,4,128,1,float16,fp8,0,0.08674666285514832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,4,128,1,float16,fp8,0,0.019258666783571243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.01921066641807556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.02252800017595291
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,float16,0,0.020975999534130096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.020917333662509918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,8,1,128,1,float16,float16,0,0.0594400018453598
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,8,4,128,1,float16,fp8,0,0.021007999777793884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,1,128,1,float16,float16,0,0.2387040058771769
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,2,128,1,float16,float16,0,0.021349333226680756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,8,4,128,1,float16,float16,0,0.021914665897687275
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,1,128,1,float16,fp8,0,0.22536534070968628
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,2,128,1,float16,float16,0,0.2390399972597758
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,float16,0,0.13064533472061157
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,2,128,1,float16,fp8,0,0.2242400050163269
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,1,128,1,float16,fp8,0,0.11655466755231221
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,1,128,1,float16,float16,0,0.13353600104649863
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,4,128,1,float16,float16,0,0.2419253389040629
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,8,4,128,1,float16,fp8,0,0.22734934091567993
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,2,128,1,float16,float16,0,0.1279253363609314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,0.12154133121172588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,4,128,1,float16,float16,0,0.13103466232617697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,4,128,1,float16,fp8,0,0.12071999907493591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.06551466882228851
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,float16,0,0.13115200400352478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,0.11769599715868632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,2,128,1,float16,fp8,0,0.06769066552321117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,1,128,1,float16,fp8,0,0.0661599983771642
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,1,128,1,float16,float16,0,0.07474133372306824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,float16,0,0.07653866708278656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,float16,0,0.07258666555086772
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,float16,0,0.043738668163617454
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,4,128,1,float16,float16,0,0.07652799785137177
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.06629333396752675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.03934400031963984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,4,128,1,float16,fp8,0,0.06824533144632976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,8,2,128,1,float16,fp8,0,0.11718400319417317
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,4,128,1,float16,float16,0,0.04371733466784159
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,2,128,1,float16,float16,0,0.04365866879622141
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,1,128,1,float16,float16,0,0.04350399971008301
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,2,128,1,float16,fp8,0,0.037658666570981346
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,4,128,1,float16,fp8,0,0.037658666570981346
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.02404800057411194
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,float16,0,0.026133333643277485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,1,128,1,float16,float16,0,0.026895999908447266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,1,128,1,float16,fp8,0,0.024832000335057575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,2,128,1,float16,float16,0,0.02743999908367793
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,2,128,1,float16,fp8,0,0.023130667706330616
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,4,128,1,float16,float16,0,0.02720533311367035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,8,2,128,1,float16,float16,0,0.07211199899514516
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,float16,0,0.027061333258946735
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.02443733314673106
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,1,128,1,float16,float16,0,0.016901332885026932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,float16,0,0.016949333250522614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,1,128,1,float16,fp8,0,0.01553600033124288
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,2,128,1,float16,float16,0,0.01700266698996226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,2,128,1,float16,fp8,0,0.015184000134468079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,4,128,1,float16,float16,0,0.01710933322707812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,4,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,float16,0,0.017136000096797943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,float16,0,0.015184000134468079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,float16,0,0.04558933277924856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,1,128,1,float16,float16,0,0.01674666628241539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.03924266745646795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,1,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,2,128,1,float16,fp8,0,0.015109332899252573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,2,128,1,float16,float16,0,0.016901332885026932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,4,128,1,float16,fp8,0,0.015114666273196539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,4,128,1,float16,float16,0,0.01655999943614006
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,float16,0,0.015200000256299973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,8,4,128,1,float16,fp8,0,0.02342933416366577
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.015008000036080679
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.014885333677132925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,float16,0,0.015418666104475657
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,2,128,1,float16,float16,0,0.015872000406185787
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,1,128,1,float16,fp8,0,0.015018666783968607
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,2,128,1,float16,fp8,0,0.015103999525308609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,4,128,1,float16,float16,0,0.015002666662136713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,4,128,1,float16,fp8,0,0.014858666807413101
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.014831999937693277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,float16,0,0.014853333433469137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.015194666882356008
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,float16,0,0.015087999403476715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,1,128,1,float16,float16,0,0.01515199989080429
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,1,128,1,float16,fp8,0,0.01488000030318896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,2,128,1,float16,float16,0,0.016069332758585613
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,2,128,1,float16,fp8,0,0.015072000523408255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,4,128,1,float16,float16,0,0.014837333311637243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,4,128,1,float16,fp8,0,0.015008000036080679
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.01611199975013733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,8,1,128,1,float16,fp8,0,0.039503999054431915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,1,128,1,float16,float16,0,0.10565333565076192
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,1,128,1,float16,fp8,0,0.09629866480827332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,2,128,1,float16,float16,0,0.10117333134015401
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,2,128,1,float16,fp8,0,0.09533333778381348
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,8,1,128,1,float16,float16,0,0.015072000523408255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,float16,0,0.05819733440876007
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,4,128,1,float16,fp8,0,0.09497599800427754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.052517334620157875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.01481066644191742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,1,128,1,float16,float16,0,0.05815466741720835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,float16,0,0.01505600040157636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,4,128,1,float16,float16,0,0.05886933207511902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.015087999403476715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,4,128,1,float16,fp8,0,0.051872000098228455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,1,128,1,float16,fp8,0,0.05095999936262766
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,2,128,1,float16,float16,0,0.05772800246874491
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.03146666785081228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,1,128,1,float16,float16,0,0.03348266581694285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.05269866685072581
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,float16,0,0.03724266588687897
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,1,128,1,float16,fp8,0,0.031317333380381264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,float16,0,0.05788800120353699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,2,128,1,float16,float16,0,0.03534399966398875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,4,128,1,float16,fp8,0,0.031167998909950256
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,float16,0,0.03660800059636434
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,4,128,1,float16,float16,0,0.03686933219432831
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.03146666785081228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,float16,0,0.020917333662509918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.019226666539907455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,8,4,128,1,float16,float16,0,0.10692800084749858
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,1,128,1,float16,fp8,0,0.01899733394384384
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,1,128,1,float16,float16,0,0.021040000021457672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,2,128,1,float16,fp8,0,0.019178666174411774
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,2,128,1,float16,float16,0,0.021040000021457672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,4,128,1,float16,float16,0,0.020917333662509918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.012789333860079447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,float16,0,0.014005333185195923
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,float16,0,0.021045332153638203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,1,128,1,float16,float16,0,0.014159999787807465
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,4,128,1,float16,fp8,0,0.01926933353145917
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,1,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,2,128,1,float16,float16,0,0.014981333166360855
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,4,128,1,float16,float16,0,0.014368000129858652
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,2,128,1,float16,fp8,0,0.012831999609867731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,4,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,float16,0,0.013674666484196981
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.013221333424250284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,float16,0,0.012768000364303589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,1,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,2,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,4,128,1,float16,float16,0,0.012789333860079447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,4,128,1,float16,fp8,0,0.012698666503032049
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,float16,0,0.012821332861979803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,8,2,128,1,float16,fp8,0,0.03142933299144109
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,float16,0,0.012709333250919977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,1,128,1,float16,float16,0,0.013104000439246496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,2,128,1,float16,float16,0,0.012789333860079447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,2,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,4,128,1,float16,float16,0,0.012666666259368261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,4,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,float16,0,0.012757333616415659
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.01886933296918869
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.012800000607967377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,8,2,128,1,float16,fp8,0,0.05320000151793162
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,1,128,1,float16,float16,0,0.012671999633312225
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,1,128,1,float16,float16,0,0.012821332861979803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,2,128,1,float16,float16,0,0.012885333349307379
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,1,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,8,2,128,1,float16,float16,0,0.012970666090647379
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,2,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,4,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,4,128,1,float16,float16,0,0.012655999511480331
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,float16,0,0.012768000364303589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.012719999998807907
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,1,128,1,float16,float16,0,0.012613333761692047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,1,128,1,float16,fp8,0,0.012634667257467905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,2,128,1,float16,float16,0,0.012821332861979803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,4,128,1,float16,float16,0,0.01267733300725619
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,4,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,float16,0,0.012709333250919977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,1,128,1,float16,fp8,0,0.012703999876976013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.012746666868527731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,1,128,1,float16,float16,0,0.06844266752401988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,float16,0,0.01259200026591619
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,1,128,1,float16,fp8,0,0.06197333335876465
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,2,128,1,float16,float16,0,0.06700799862543742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,2,128,1,float16,fp8,0,0.063509335120519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.03356799980004629
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,1,128,1,float16,float16,0,0.037658666570981346
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,float16,0,0.03924266745646795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,4,128,1,float16,float16,0,0.06673599779605865
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,8,4,128,1,float16,fp8,0,0.06287999947865804
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,2,128,1,float16,float16,0,0.03749333322048187
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,float16,0,0.01293333371480306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,8,2,128,1,float16,fp8,0,0.012810666114091873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,float16,0,0.039333333571751915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,4,128,1,float16,fp8,0,0.03346133232116699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,float16,0,0.023215999205907185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.033258666594823204
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.022597332795461018
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,1,128,1,float16,fp8,0,0.02187199890613556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,1,128,1,float16,float16,0,0.02329600105683009
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,2,128,1,float16,float16,0,0.023413332800070446
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,4,128,1,float16,float16,0,0.02316266546646754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,2,128,1,float16,fp8,0,0.022069332500298817
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,4,128,1,float16,fp8,0,0.021594665944576263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,float16,0,0.023056000471115112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.014896000425020853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.022309333086013794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,1,128,1,float16,float16,0,0.01505600040157636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,1,128,1,float16,fp8,0,0.014741333822409311
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,2,128,1,float16,float16,0,0.014938666174809137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,2,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,4,128,1,float16,float16,0,0.015034666905800501
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,4,128,1,float16,fp8,0,0.014896000425020853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,1,128,1,float16,fp8,0,0.035301332672437034
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.014837333311637243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,float16,0,0.010970667004585266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,1,128,1,float16,float16,0,0.010874666273593903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,1,128,1,float16,fp8,0,0.010768000036478043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,2,128,1,float16,float16,0,0.01099733387430509
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,2,128,1,float16,fp8,0,0.033946665624777474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,2,128,1,float16,fp8,0,0.010784000158309937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,8,4,128,1,float16,float16,0,0.03841600070397059
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,4,128,1,float16,float16,0,0.01097600037852923
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,4,128,1,float16,fp8,0,0.010842667271693548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.010709332923094431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,float16,0,0.010735999792814255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,1,128,1,float16,float16,0,0.010970667004585266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.010698666175206503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,float16,0,0.01073066641887029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,1,128,1,float16,fp8,0,0.010629333555698395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,2,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,4,128,1,float16,float16,0,0.01080000028014183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,4,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,float16,0,0.010682666053374609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,float16,0,0.010629333555698395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,1,128,1,float16,float16,0,0.010911999891201654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,1,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,2,128,1,float16,float16,0,0.010853332777818045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,2,128,1,float16,fp8,0,0.009061333412925402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,4,128,1,float16,float16,0,0.010741333166758219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,4,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,float16,0,0.015066667149464289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,float16,0,0.010751999914646149
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,float16,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,1,128,1,float16,float16,0,0.010714666297038397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,1,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,float16,0,0.014922666052977243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.010826667149861654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,4,128,1,float16,float16,0,0.008858666444818178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,4,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.009082666908701261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,1,128,1,float16,float16,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,2,128,1,float16,float16,0,0.010634666929642359
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,1,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,2,128,1,float16,float16,0,0.010608000059922537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.010421333213647207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,2,128,1,float16,fp8,0,0.008709333216150602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,4,128,1,float16,float16,0,0.010826667149861654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,4,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,float16,0,0.009082666908701261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.009328000247478485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,1,128,1,float16,float16,0,0.057802667220433555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,2,128,1,float16,float16,0,0.05704000095526377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,2,128,1,float16,float16,0,0.009717333440979322
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,2,128,1,float16,fp8,0,0.00933333362142245
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,float16,0,0.010666667173306147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,2,128,1,float16,fp8,0,0.05374933282534281
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.03075733284155528
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,4,128,1,float16,float16,0,0.05808533231417338
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,4,128,1,float16,fp8,0,0.051967998345692955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,1,128,1,float16,float16,0,0.03329066683848699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,2,128,1,float16,float16,0,0.033285332222779594
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,2,128,1,float16,fp8,0,0.02934933453798294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,4,128,1,float16,float16,0,0.033200000723203026
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,4,128,1,float16,fp8,0,0.03127466638882955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,float16,0,0.03180799881617228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,float16,0,0.021018666525681812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.029493334392706554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.01974933346112569
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,1,128,1,float16,float16,0,0.021216000119845074
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,1,128,1,float16,fp8,0,0.01923199991385142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,2,128,1,float16,float16,0,0.021290667355060577
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,2,128,1,float16,fp8,0,0.019317333896954853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,8,1,128,1,float16,fp8,0,0.054378668467203774
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,4,128,1,float16,float16,0,0.02128533273935318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,4,128,1,float16,fp8,0,0.01921066641807556
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.01899733394384384
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,float16,0,0.021226666867733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,1,128,1,float16,float16,0,0.014565333724021912
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,float16,0,0.013477332890033722
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,1,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,2,128,1,float16,float16,0,0.014639999717473984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,float16,0,0.03310399999221166
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,4,128,1,float16,float16,0,0.013210666676362356
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,float16,0,0.014368000129858652
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.012938667088747025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,8,1,128,1,float16,fp8,0,0.030453334252039593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,float16,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,1,128,1,float16,float16,0,0.010928000013033548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,1,128,1,float16,fp8,0,0.00983466642598311
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,2,128,1,float16,float16,0,0.010645333677530289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,2,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,4,128,1,float16,float16,0,0.010751999914646149
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,4,128,1,float16,fp8,0,0.009637333452701569
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,float16,0,0.01062400018175443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,float16,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,1,128,1,float16,float16,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,1,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,2,128,1,float16,float16,0,0.008874666566650072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,2,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,4,128,1,float16,float16,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,float16,0,0.009141333401203156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.012837332983811697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,float16,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,1,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,2,128,1,float16,fp8,0,0.012789333860079447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,2,128,1,float16,float16,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,2,128,1,float16,fp8,0,0.008661333471536636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,4,128,1,float16,float16,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,4,128,1,float16,fp8,0,0.008661333471536636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,float16,0,0.008821333448092142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,float16,0,0.00873066671192646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,1,128,1,float16,float16,0,0.009066666786869368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,1,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,2,128,1,float16,float16,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,2,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.01073066641887029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,4,128,1,float16,float16,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,float16,0,0.00915733352303505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,4,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,float16,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,float16,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,1,128,1,float16,float16,0,0.009088000282645226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,1,128,1,float16,fp8,0,0.008746666833758354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,2,128,1,float16,float16,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,2,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,4,128,1,float16,float16,0,0.009178666397929192
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,8,1,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,8,4,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,float16,0,0.008736000085870424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.008810666700204214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,1,128,1,float16,float16,0,0.0545066644748052
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,1,128,1,float16,fp8,0,0.04885333279768626
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,2,128,1,float16,float16,0,0.053770666321118675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,8,4,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,2,128,1,float16,fp8,0,0.049546668926874794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.027461332579453785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,1,128,1,float16,float16,0,0.030784000953038532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,4,128,1,float16,float16,0,0.05384000142415365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,8,4,128,1,float16,fp8,0,0.04776533444722494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,1,128,1,float16,fp8,0,0.027674667537212372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,2,128,1,float16,float16,0,0.03126933425664902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,2,128,1,float16,fp8,0,0.027082666754722595
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,4,128,1,float16,float16,0,0.03133866687615713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,4,128,1,float16,fp8,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,float16,0,0.018901333212852478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.01889066646496455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,float16,0,0.031167998909950256
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,1,128,1,float16,fp8,0,0.018858666221300762
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,2,128,1,float16,float16,0,0.01945066700379054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,2,128,1,float16,fp8,0,0.019061333189407986
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,4,128,1,float16,float16,0,0.019167999426523846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,4,128,1,float16,fp8,0,0.018933333456516266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,float16,0,0.019178666174411774
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.018901333212852478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,float16,0,0.012821332861979803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.012703999876976013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,1,128,1,float16,float16,0,0.012863999853531519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,1,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,float16,0,0.031328000128269196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,2,128,1,float16,float16,0,0.013173333058754602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,2,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,4,128,1,float16,float16,0,0.012746666868527731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,4,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,float16,0,0.012725333372751871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,float16,0,0.0106133334338665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,1,128,1,float16,float16,0,0.010773333410422007
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,1,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,4,128,1,float16,fp8,0,0.027482666075229645
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,2,128,1,float16,float16,0,0.010698666175206503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,2,128,1,float16,fp8,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,4,128,1,float16,float16,0,0.010768000036478043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,4,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,float16,0,0.010837333897749582
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,8,1,128,1,float16,float16,0,0.01926933353145917
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.008810666700204214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,float16,0,0.008832000195980072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,1,128,1,float16,fp8,0,0.00873066671192646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,2,128,1,float16,float16,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,4,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,4,128,1,float16,fp8,0,0.008661333471536636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,float16,0,0.008879999940594038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,float16,0,0.00877333308259646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,1,128,1,float16,float16,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,1,128,1,float16,fp8,0,0.008746666833758354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,2,128,1,float16,float16,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,2,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,4,128,1,float16,fp8,0,0.008709333216150602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.027424000203609467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,1,128,1,float16,float16,0,0.009610666582981745
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,1,128,1,float16,float16,0,0.008863999818762144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,2,128,1,float16,fp8,0,0.009119999905427298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,2,128,1,float16,float16,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,2,128,1,float16,fp8,0,0.008736000085870424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,4,128,1,float16,float16,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,4,128,1,float16,fp8,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,float16,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,float16,0,0.009061333412925402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.008687999720374743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,1,128,1,float16,float16,0,0.008767999708652496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,2,128,1,float16,float16,0,0.008832000195980072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,2,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,4,128,1,float16,float16,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,4,128,1,float16,fp8,0,0.008682666967312494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,float16,0,0.008703999842206636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,4,128,1,float16,float16,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,1,128,1,float16,float16,0,0.0539680023988088
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,1,128,1,float16,fp8,0,0.0476746658484141
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,2,128,1,float16,float16,0,0.0544053316116333
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,float16,0,0.00867733359336853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,1,128,1,float16,fp8,0,0.008757333581646284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,float16,0,0.031311998764673867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,4,128,1,float16,float16,0,0.05179200073083242
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,4,128,1,float16,fp8,0,0.04762666424115499
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.027295999228954315
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,1,128,1,float16,float16,0,0.031386665999889374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,8,1,128,1,float16,fp8,0,0.008725333337982496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,1,128,1,float16,fp8,0,0.02722666660944621
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,2,128,1,float16,fp8,0,0.027141332626342773
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,4,128,1,float16,float16,0,0.02997333308060964
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,float16,0,0.029765332738558452
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.027274665733178455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,float16,0,0.019199999670187633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.01721599946419398
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,1,128,1,float16,float16,0,0.019050666441520054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,1,128,1,float16,fp8,0,0.01964266722400983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,2,128,1,float16,float16,0,0.019194666296243668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,2,128,1,float16,fp8,0,0.018826667219400406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,4,128,1,float16,fp8,0,0.01806933308641116
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,4,128,1,float16,float16,0,0.019167999426523846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,float16,0,0.019296000401178997
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,8,2,128,1,float16,fp8,0,0.047839999198913574
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,float16,0,0.01313599944114685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.012863999853531519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,1,128,1,float16,float16,0,0.01313599944114685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,1,128,1,float16,fp8,0,0.012698666503032049
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,2,128,1,float16,float16,0,0.012847999731699625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,2,128,1,float16,fp8,0,0.012741333494583765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,4,128,1,float16,fp8,0,0.012624000509579977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,float16,0,0.012853333105643591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,float16,0,0.010741333166758219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,2,128,1,float16,float16,0,0.0312266672650973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,1,128,1,float16,float16,0,0.010597333312034607
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,2,128,1,float16,float16,0,0.010768000036478043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,2,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,4,128,1,float16,float16,0,0.010816000401973724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,8,4,128,1,float16,fp8,0,0.02716800073782603
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,float16,0,0.010762666662534079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,1,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,2,128,1,float16,float16,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,2,128,1,float16,fp8,0,0.008863999818762144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,4,128,1,float16,float16,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,4,128,1,float16,fp8,0,0.008853333070874214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,float16,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,float16,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,1,128,1,float16,float16,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,1,128,1,float16,fp8,0,0.008714666590094566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,2,128,1,float16,fp8,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,4,128,1,float16,float16,0,0.01302933320403099
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,4,128,1,float16,float16,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,4,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.012815999488035837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,float16,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,float16,0,0.00877333308259646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,1,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,1,128,1,float16,float16,0,0.008901333436369896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,2,128,1,float16,float16,0,0.008703999842206636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,2,128,1,float16,fp8,0,0.00877333308259646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,8,4,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,4,128,1,float16,float16,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,4,128,1,float16,fp8,0,0.008799999952316284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,float16,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.008634666601816813
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,float16,0,0.008837333569924036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,1,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,2,128,1,float16,float16,0,0.010064000263810158
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,2,128,1,float16,fp8,0,0.00874133345981439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,4,128,1,float16,float16,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,4,128,1,float16,fp8,0,0.008714666590094566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,float16,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,0,0.008687999720374743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,8,2,128,1,float16,float16,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,0,0.008576000109314919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,0,0.011472000430027643
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.008602666358153025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,8,1,128,1,float16,fp8,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,0,0.010682666053374609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,0,0.01699200024207433
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,0,0.016800000021855038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.015018666783968607
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,0,0.0075093333919843035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,8,1,128,1,float16,float16,0,0.008837333569924036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,0,0.006640000268816948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,0,0.011178666104873022
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,0,0.010629333555698395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,0,0.016757333030303318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.010682666053374609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,0,0.012367999802033106
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.014896000425020853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,0,0.007338666667540868
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,0,0.006842666616042455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,0,0.00874133345981439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,0,0.008682666967312494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,0,0.012416000167528788
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.010735999792814255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,0,0.006709333509206772
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,0,0.006677333265542984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,0,0.0069973332186539965
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,0,0.006725333631038666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,8,1,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,0,0.008613333106040955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,0,0.0069759997228781385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,0,0.014837333311637243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,0,0.008767999708652496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,0,0.008757333581646284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.007007999966541926
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,0,0.006762666627764702
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,0,0.00684799998998642
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,0,0.0069386667261521024
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,0,0.0069333333522081375
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.006917333230376244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,0,0.006693333387374878
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.006666666517655055
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,0,0.006773333375652631
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,0,0.006650666395823161
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,0,0.006650666395823161
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,0,0.006624000146985054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,0,0.007098666702707608
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.006709333509206772
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,0,0.006682666639486949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,0,0.006682666639486949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,0,0.006725333631038666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,0,0.006655999769767125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,0,0.007381333038210869
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,0,0.006709333509206772
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,0,0.007365333537260692
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,0,0.0069386667261521024
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,0,0.006688000013430913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,0,0.0069759997228781385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,0,0.00702400008837382
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,0,0.006645333642760913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.0069226666043202085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,0,0.006890666360656421
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,0,0.006757333253820737
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,0,0.006911999856432279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,0,0.007029333462317784
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,0,0.006655999769767125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,0,0.006858666737874349
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,0,0.00697066696981589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.006853333363930385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,0,0.007034666836261749
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,0,0.006778666749596596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,0,0.006927999978264173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,0,0.0068693334857622785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,0,0.006602666651209195
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,0,0.007002666592597961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,0,0.006720000257094701
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,0,0.007055999711155891
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.0069919998447100324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,1,128,1,float16,float16,0,1.4719680150349934
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,1,128,1,float16,fp8,0,1.3499573071797688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,2,128,1,float16,float16,0,1.551743984222412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,4,2,128,1,float16,fp8,0,1.3506560325622559
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,float16,0,0.803610642751058
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.006864000111818314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,0.7790239651997884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,1,128,1,float16,float16,0,0.850325345993042
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,1,128,1,float16,fp8,0,0.7338666915893555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,2,128,1,float16,float16,0,0.8023040294647217
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,2,128,1,float16,fp8,0,0.7794720331827799
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,0,0.006629333520929019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.006927999978264173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,float16,0,0.8510666688283285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,0.7380959987640381
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,0.45231465498606366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,0,0.006778666749596596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,1,128,1,float16,float16,0,0.5075200001398722
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,1,128,1,float16,fp8,0,0.4231733481089274
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,2,128,1,float16,fp8,0,0.44301335016886395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,0.4522240161895752
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,float16,0,0.29581334193547565
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,0.2691359917322795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,1,128,1,float16,float16,0,0.3230560024579366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,1,128,1,float16,fp8,0,0.2881706754366557
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,2,128,1,float16,float16,0,0.32683199644088745
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,2,128,1,float16,fp8,0,0.2882293264071147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,float16,0,0.2990399996439616
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,0.2706933418909709
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,1,128,1,float16,fp8,0,0.7952480316162109
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,1,128,1,float16,float16,0,0.923093318939209
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,2,128,1,float16,float16,0,0.8697120348612467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,float16,0,0.5236479838689169
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,4,2,128,1,float16,fp8,0,0.8009866873423258
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,0.47605868180592853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,1,128,1,float16,float16,0,0.5539626677831014
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,1,128,1,float16,fp8,0,0.4723893404006958
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,2,128,1,float16,float16,0,0.5440266529719034
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,2,128,1,float16,fp8,0,0.473141352335612
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,float16,0,0.5178933143615723
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,float16,0,0.49718932310740155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,0.47516798973083496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,float16,0,0.3149919907251994
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,0.27960000435511273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,1,128,1,float16,fp8,0,0.2842506567637126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,2,128,1,float16,float16,0,0.4957866668701172
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,2,128,1,float16,float16,0,0.3165066639582316
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,2,128,1,float16,fp8,0,0.28224533796310425
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,float16,0,0.2940000096956889
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,float16,0,0.20695465803146362
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,0.18513067563374838
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,1,128,1,float16,float16,0,0.20557334025700888
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,1,128,1,float16,fp8,0,0.18581332763036093
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,2,128,1,float16,float16,0,0.2055466572443644
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,2,128,1,float16,fp8,0,0.18515199422836304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,0.18529067436854044
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,float16,0,0.18918399016062418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,1,128,1,float16,float16,0,0.669701337814331
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,1,128,1,float16,fp8,0,0.574506680170695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,2,128,1,float16,float16,0,0.6269546747207642
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,4,2,128,1,float16,fp8,0,0.6139839887619019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,float16,0,0.35758399963378906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,1,128,1,float16,float16,0,0.37193600336710614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,1,128,1,float16,fp8,0,0.34888001283009845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,2,128,1,float16,float16,0,0.35649065176645917
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,2,128,1,float16,fp8,0,0.34856534004211426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,float16,0,0.3596266508102417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,0.3266719977060954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,float16,0,0.24403733015060425
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,1,128,1,float16,float16,0,0.3062186638514201
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,1,128,1,float16,float16,0,0.24370666344960532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,1,128,1,float16,fp8,0,0.21821333964665732
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.006725333631038666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,2,128,1,float16,float16,0,0.2230506738026937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,2,128,1,float16,fp8,0,0.21683732668558756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,0.2814133365948995
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,float16,0,0.2421919902165731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,0.214026669661204
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,float16,0,0.14972266554832458
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,1,128,1,float16,float16,0,0.15082666277885437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,1,128,1,float16,fp8,0,0.13667200009028116
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,2,128,1,float16,float16,0,0.15179199973742166
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,2,128,1,float16,fp8,0,0.1361120045185089
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,0.13597866892814636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,1,128,1,float16,fp8,0,0.7142666975657145
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,1,128,1,float16,float16,0,0.8388106822967529
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,2,128,1,float16,float16,0,0.8717280228932699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,float16,0,0.4349546829859416
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,4,2,128,1,float16,fp8,0,0.7172373135884603
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,0.41843199729919434
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,1,128,1,float16,float16,0,0.44892267386118573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,1,128,1,float16,fp8,0,0.41837334632873535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,2,128,1,float16,float16,0,0.45395731925964355
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,2,128,1,float16,fp8,0,0.4129279851913452
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,float16,0,0.42660800615946454
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,0.4177759885787964
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,float16,0,0.2749493320782979
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,0.2034133275349935
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,1,128,1,float16,float16,0,0.2662186622619629
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,float16,0,0.5023786624272665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,1,128,1,float16,fp8,0,0.24497600396474203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,2,128,1,float16,float16,0,0.24990934133529663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,2,128,1,float16,fp8,0,0.2434719999631246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,0.1358453333377838
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,float16,0,0.280623992284139
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,float16,0,0.16214932998021445
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,0.15813333789507547
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,1,128,1,float16,float16,0,0.17626132567723593
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,2,128,1,float16,float16,0,0.16044800480206808
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,2,128,1,float16,fp8,0,0.15680000185966492
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,float16,0,0.15079999963442484
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,0.1461066703001658
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,float16,0,0.11168533563613892
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,0.10113599896430969
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,1,128,1,float16,float16,0,0.11239467064539592
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,1,128,1,float16,fp8,0,0.10288000106811523
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,2,128,1,float16,fp8,0,0.10213866829872131
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,float16,0,0.11177066961924235
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,0.10281599561373393
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,0.3269919951756795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,1,128,1,float16,float16,0,0.5114880005518595
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,1,128,1,float16,fp8,0,0.428607980410258
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,2,128,1,float16,float16,0,0.46765867869059247
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,4,2,128,1,float16,fp8,0,0.45949331919352215
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,float16,0,0.26501866181691486
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,0.24086399873097739
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,1,128,1,float16,float16,0,0.28138667345046997
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,1,128,1,float16,fp8,0,0.2392586668332418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,0.2260319987932841
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,2,128,1,float16,float16,0,0.26264532407124835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,2,128,1,float16,fp8,0,0.2408426602681478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,float16,0,0.26450133323669434
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,0.24041599035263062
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,0.24450133244196573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,float16,0,0.17243733008702597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,0.1441333293914795
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,1,128,1,float16,float16,0,0.17695999145507812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,1,128,1,float16,fp8,0,0.15828266739845276
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,2,128,1,float16,float16,0,0.17385600010553995
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,2,128,1,float16,fp8,0,0.15568000078201294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,0.15507733821868896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,float16,0,0.1728000044822693
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,0.1032373309135437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,1,128,1,float16,float16,0,0.11603200435638428
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,1,128,1,float16,fp8,0,0.10116266210873921
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,4,2,128,1,float16,float16,0,0.11083733042081197
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,2,128,1,float16,float16,0,0.11602133512496948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,2,128,1,float16,fp8,0,0.1029919981956482
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,float16,0,0.11442133784294128
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,0.10188800096511841
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,float16,0,0.08739200234413147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.07821333408355713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,1,128,1,float16,float16,0,0.08663466572761536
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,1,128,1,float16,fp8,0,0.07801066835721333
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,2,128,1,float16,float16,0,0.08682133754094441
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,2,128,1,float16,fp8,0,0.07870933413505554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.0783733328183492
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,1,128,1,float16,float16,0,0.45398398240407306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,1,128,1,float16,fp8,0,0.424890677134196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,2,128,1,float16,float16,0,0.44809067249298096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,0.23445866505304971
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,float16,0,0.2666880091031392
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,4,2,128,1,float16,fp8,0,0.39821867148081463
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,1,128,1,float16,float16,0,0.24310400088628134
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,2,128,1,float16,float16,0,0.23934400081634521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,1,128,1,float16,fp8,0,0.15651733676592508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,2,128,1,float16,fp8,0,0.219050665696462
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,float16,0,0.2618880073229472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,float16,0,0.15934399763743082
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,float16,0,0.11517866452534993
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,0.23415466149648032
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,float16,0,0.14241066575050354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,1,128,1,float16,float16,0,0.15576533476511636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,0.12737600008646646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,2,128,1,float16,float16,0,0.155349334081014
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,1,128,1,float16,fp8,0,0.13691199819246927
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,2,128,1,float16,fp8,0,0.13774399956067404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,float16,0,0.1421440045038859
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,0.08966400225957234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,1,128,1,float16,fp8,0,0.08933333555857341
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,0.12838932871818542
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,2,128,1,float16,float16,0,0.09057600299517314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,2,128,1,float16,fp8,0,0.08949333429336548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,float16,0,0.09696533282597859
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,float16,0,0.057760000228881836
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,float16,0,0.08724799752235413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,1,128,1,float16,float16,0,0.06386666496594746
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,1,128,1,float16,fp8,0,0.05212266743183136
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,2,128,1,float16,float16,0,0.0621919979651769
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,2,128,1,float16,fp8,0,0.05644266804059347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,float16,0,0.05786666770776113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.056133334835370384
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,float16,0,0.05994133154551188
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.051642666260401406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,1,128,1,float16,float16,0,0.06012799839178721
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,1,128,1,float16,fp8,0,0.05580799778302511
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,2,128,1,float16,float16,0,0.055957332253456116
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,2,128,1,float16,fp8,0,0.05539733171463013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,float16,0,0.05958933134873708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.051632001996040344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,1,128,1,float16,float16,0,0.2831839919090271
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,2,128,1,float16,float16,0,0.26739199956258136
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,1,128,1,float16,fp8,0,0.2656480073928833
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,float16,0,0.16500266393025717
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,4,2,128,1,float16,fp8,0,0.25962666670481366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,0.14985066652297974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,1,128,1,float16,float16,0,0.1701493263244629
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,1,128,1,float16,fp8,0,0.1470026671886444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,float16,0,0.0913759966691335
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,2,128,1,float16,fp8,0,0.14888532956441244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,float16,0,0.15306133031845093
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,0.08246399958928426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,0.13923199971516928
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.056799997886021934
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,1,128,1,float16,float16,0,0.09904000163078308
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,1,128,1,float16,fp8,0,0.08360532919565837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,2,128,1,float16,float16,0,0.0988159974416097
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,2,128,1,float16,fp8,0,0.0904906690120697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,0.08989333113034566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,float16,0,0.0935093363126119
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,float16,0,0.060778667529424034
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.0557226687669754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,1,128,1,float16,float16,0,0.06614399949709575
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,1,128,1,float16,fp8,0,0.0565226674079895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,2,128,1,float16,float16,0,0.06668800115585327
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,2,128,1,float16,fp8,0,0.059845333298047386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.056128000219662987
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,float16,0,0.04580266773700714
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,4,1,128,1,float16,fp8,0,0.2365866700808207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.041509332756201424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,1,128,1,float16,float16,0,0.04571199913819631
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,1,128,1,float16,fp8,0,0.045221333702405296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,2,128,1,float16,float16,0,0.045706664522488914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,2,128,1,float16,fp8,0,0.04422933359940847
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,float16,0,0.045642669002215065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.041696002086003624
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,4,2,128,1,float16,float16,0,0.1707680026690165
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.04142933338880539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,1,128,1,float16,float16,0,0.04795200129350027
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,1,128,1,float16,fp8,0,0.04385066529115041
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,4,1,128,1,float16,float16,0,0.09089066584904988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,2,128,1,float16,float16,0,0.04794666667779287
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,2,128,1,float16,fp8,0,0.04377600053946177
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,float16,0,0.04387199878692627
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,float16,0,0.10141866405804952
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.043696001172065735
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,0.08272533118724823
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,1,128,1,float16,float16,0,0.2809706727663676
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,float16,0,0.1458026667435964
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,1,128,1,float16,fp8,0,0.23763734102249146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,2,128,1,float16,fp8,0,0.2546773354212443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,0.14102933804194132
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,1,128,1,float16,float16,0,0.15915200114250183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,1,128,1,float16,fp8,0,0.13078932960828146
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,2,128,1,float16,float16,0,0.1488586664199829
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,2,128,1,float16,fp8,0,0.13160000244776407
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,0.1325813333193461
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,float16,0,0.09291199843088786
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,0.07771199941635132
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,1,128,1,float16,float16,0,0.083514670530955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,1,128,1,float16,fp8,0,0.08169066905975342
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,2,128,1,float16,float16,0,0.0860586663087209
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,2,128,1,float16,fp8,0,0.07481599847475688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,float16,0,0.04420800010363261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,float16,0,0.09226133426030476
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,0.07727999985218048
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,float16,0,0.055760001142819725
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.04956800242265066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,1,128,1,float16,float16,0,0.05995733539263407
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,2,128,1,float16,float16,0,0.0595413347085317
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,2,128,1,float16,fp8,0,0.05239466826121012
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,float16,0,0.0580213318268458
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.05223466455936432
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,float16,0,0.0373279998699824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.03346666693687439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,1,128,1,float16,float16,0,0.037402667105197906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,1,128,1,float16,fp8,0,0.033626665671666466
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,2,128,1,float16,float16,0,0.037674665451049805
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,2,128,1,float16,fp8,0,0.033200000723203026
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.03140799949566523
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.031680000325044
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,float16,0,0.035599999129772186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,1,128,1,float16,float16,0,0.035631999373435974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,1,128,1,float16,fp8,0,0.03328000009059906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,2,128,1,float16,fp8,0,0.03317866722742716
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,2,128,1,float16,float16,0,0.035216001172860466
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,float16,0,0.03345066557327906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,float16,0,0.034314667185147606
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.03142400085926056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,float16,0,0.061280002196629844
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,1,128,1,float16,float16,0,0.03526400029659271
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,1,128,1,float16,fp8,0,0.03155199935038885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,2,128,1,float16,fp8,0,0.031445334355036415
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,float16,0,0.03145600110292435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,1,128,1,float16,float16,0,0.179584006468455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,4,1,128,1,float16,fp8,0,0.051776001850763954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,1,128,1,float16,fp8,0,0.1539306640625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,2,128,1,float16,float16,0,0.18236267566680908
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,4,2,128,1,float16,fp8,0,0.16317333777745566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,float16,0,0.033413333197434746
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,1,128,1,float16,float16,0,0.09497066338857015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,4,2,128,1,float16,float16,0,0.2752000093460083
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,float16,0,0.10521599650382996
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,0.09498133261998494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.03126399964094162
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,1,128,1,float16,fp8,0,0.08545066912968953
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,2,128,1,float16,float16,0,0.10246400038401286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,0.09222400188446045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,float16,0,0.09738133351008098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,float16,0,0.1455359955628713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,2,128,1,float16,float16,0,0.03192000091075897
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.05475200215975443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,1,128,1,float16,float16,0,0.06215466558933258
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.030506665507952373
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,2,128,1,float16,float16,0,0.06263466676076253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,2,128,1,float16,fp8,0,0.05197333296140035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,float16,0,0.06328533093134563
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,float16,0,0.04001066585381826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,1,128,1,float16,float16,0,0.03763733307520548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.03749866783618927
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,1,128,1,float16,fp8,0,0.03734933336575826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,2,128,1,float16,fp8,0,0.03524799893299738
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,float16,0,0.04167999823888143
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.037317333122094475
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,float16,0,0.02741866558790207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.027317332724730175
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,1,128,1,float16,float16,0,0.029520000020662945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,1,128,1,float16,fp8,0,0.02532266577084859
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,2,128,1,float16,float16,0,0.029391999046007793
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,2,128,1,float16,fp8,0,0.02741333345572154
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,float16,0,0.02735999971628189
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.027104000250498455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,float16,0,0.027029333015282948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.02717333287000656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,1,128,1,float16,float16,0,0.027322667340437572
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,1,128,1,float16,fp8,0,0.02714666724205017
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,2,128,1,float16,float16,0,0.02717333287000656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,2,128,1,float16,fp8,0,0.02714666724205017
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,float16,0,0.027061333258946735
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.02717333287000656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,float16,0,0.027093333502610523
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,4,2,128,1,float16,fp8,0,0.09330133597056071
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,float16,0,0.059674665331840515
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,1,128,1,float16,float16,0,0.02739733209212621
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,2,128,1,float16,float16,0,0.02792533238728841
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,1,128,1,float16,fp8,0,0.05573866764704386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,2,128,1,float16,fp8,0,0.025066666305065155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,float16,0,0.028714666763941448
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.026890667776266735
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,4,2,128,1,float16,float16,0,0.04146133363246918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,1,128,1,float16,float16,0,0.17150400082270303
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,1,128,1,float16,fp8,0,0.16053332885106406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,fp8,0,0.08736532926559448
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,2,128,1,float16,float16,0,0.18374399344126383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,float16,0,0.09588266412417094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,4,2,128,1,float16,fp8,0,0.16562133034070334
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,1,128,1,float16,float16,0,0.09659733374913533
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,1,128,1,float16,fp8,0,0.0874079962571462
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,float16,0,0.05804799993832906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,2,128,1,float16,fp8,0,0.08690667152404785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.026869334280490875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,float16,0,0.1034986674785614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,2,128,1,float16,float16,0,0.10217600067456563
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,fp8,0,0.0909440020720164
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,4,1,128,1,float16,fp8,0,0.02716800073782603
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.05042133231957754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,1,128,1,float16,fp8,0,0.05380799869696299
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.05226666728655497
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.03531199942032496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,2,128,1,float16,fp8,0,0.05382933219273885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,1,128,1,float16,float16,0,0.03759466608365377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,1,128,1,float16,float16,0,0.05473066866397858
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,float16,0,0.03531199942032496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.05351999898751577
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,float16,0,0.058378666639328
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,4,2,128,1,float16,float16,0,0.06159466505050659
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,1,128,1,float16,fp8,0,0.03518400092919668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,2,128,1,float16,fp8,0,0.032885332902272545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,float16,0,0.03545066714286804
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.033615998923778534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,1,128,1,float16,fp8,0,0.022687998910744984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,1,128,1,float16,float16,0,0.024682665864626568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.022863999009132385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,float16,0,0.02370133250951767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,2,128,1,float16,fp8,0,0.02128533273935318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.02195200075705846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.021136000752449036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,1,128,1,float16,float16,0,0.023024000227451324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,1,128,1,float16,fp8,0,0.020479999482631683
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,2,128,1,float16,float16,0,0.023103999594847362
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,2,128,1,float16,fp8,0,0.020981334149837494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,float16,0,0.02298133323589961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.01947733387351036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.020981334149837494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,1,128,1,float16,float16,0,0.02242133269707362
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,1,128,1,float16,fp8,0,0.01918399954835574
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,2,128,1,float16,float16,0,0.022410665949185688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,2,128,1,float16,fp8,0,0.020975999534130096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,float16,0,0.021194666624069214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.020986666282018025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,float16,0,0.02125866711139679
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.01918399954835574
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,1,128,1,float16,float16,0,0.021018666525681812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,1,128,1,float16,fp8,0,0.021029333273569744
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,2,128,1,float16,float16,0,0.021136000752449036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,2,128,1,float16,fp8,0,0.019968000551064808
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,float16,0,0.022111999491850536
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.01922133316596349
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,4,2,128,1,float16,float16,0,0.03772799919048945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,2,128,1,float16,float16,0,0.02513066679239273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,1,128,1,float16,fp8,0,0.12124799688657124
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,float16,0,0.023311999936898548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,float16,0,0.022917332748572033
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,2,128,1,float16,float16,0,0.12808533509572348
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,float16,0,0.02111999938885371
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,1,128,1,float16,float16,0,0.07247999807198842
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,float16,0,0.07562666634718578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,1,128,1,float16,fp8,0,0.06761066615581512
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.06625600159168243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,2,128,1,float16,float16,0,0.07553599774837494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,float16,0,0.04553066690762838
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,2,128,1,float16,fp8,0,0.06482133269309998
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.06621333460013072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.038634667793909706
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,float16,0,0.07644799848397572
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,1,128,1,float16,float16,0,0.043706665436426796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,float16,0,0.044495999813079834
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,2,128,1,float16,float16,0,0.04493333399295807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,2,128,1,float16,fp8,0,0.039333333571751915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,float16,0,0.027386667827765148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.02510933329661687
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,1,128,1,float16,float16,0,0.027450665831565857
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,1,128,1,float16,fp8,0,0.024165332317352295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,2,128,1,float16,float16,0,0.027077332139015198
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,1,128,1,float16,float16,0,0.13076266646385193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,2,128,1,float16,fp8,0,0.02550400048494339
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,float16,0,0.027327999472618103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.025008000433444977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.016885332763195038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,1,128,1,float16,float16,0,0.01722666621208191
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,1,128,1,float16,fp8,0,0.016864000509182613
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,2,128,1,float16,float16,0,0.01711999997496605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,2,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,float16,0,0.01711999997496605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,4,2,128,1,float16,fp8,0,0.11981866757074992
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.016895999511082966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,1,128,1,float16,fp8,0,0.014858666807413101
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,2,128,1,float16,float16,0,0.01505600040157636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,1,128,1,float16,float16,0,0.01509333277742068
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,2,128,1,float16,fp8,0,0.01523200049996376
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,float16,0,0.01580799991885821
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.015082667271296183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,float16,0,0.015141333142916361
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.014853333433469137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,1,128,1,float16,float16,0,0.01504533365368843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,1,128,1,float16,fp8,0,0.03942399968703588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,2,128,1,float16,float16,0,0.01515199989080429
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,2,128,1,float16,fp8,0,0.014912000546852747
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.039349332451820374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,float16,0,0.01481066644191742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,1,128,1,float16,float16,0,0.015135999768972397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.014912000546852747
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,1,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,2,128,1,float16,float16,0,0.015098666151364645
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,2,128,1,float16,fp8,0,0.014805333067973455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,float16,0,0.015210667004187902
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,float16,0,0.017263999829689663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.01481066644191742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,float16,0,0.015184000134468079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.014831999937693277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,1,128,1,float16,fp8,0,0.014762666076421738
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,2,128,1,float16,float16,0,0.015247999380032221
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,2,128,1,float16,fp8,0,0.014815999815861383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,float16,0,0.014997333288192749
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.014970666418472925
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,float16,0,0.015141333142916361
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,1,128,1,float16,float16,0,0.058693334460258484
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,1,128,1,float16,fp8,0,0.051925331354141235
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,1,128,1,float16,fp8,0,0.01516266663869222
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,2,128,1,float16,float16,0,0.05977066854635874
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,4,2,128,1,float16,fp8,0,0.05169066786766052
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,float16,0,0.01653333380818367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,float16,0,0.03689600030581156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.031658666829268135
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,1,128,1,float16,float16,0,0.03590933233499527
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,1,128,1,float16,fp8,0,0.031541332602500916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,2,128,1,float16,float16,0,0.03653866549332937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,2,128,1,float16,fp8,0,0.03163733333349228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,float16,0,0.021002667645613354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.019167999426523846
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.03128000100453695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,1,128,1,float16,float16,0,0.021295999487241108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,1,128,1,float16,fp8,0,0.019050666441520054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,4,1,128,1,float16,float16,0,0.01543466622630755
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,2,128,1,float16,float16,0,0.020954666038354237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,2,128,1,float16,fp8,0,0.01904533306757609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,float16,0,0.020842666427294414
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.01314666618903478
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,1,128,1,float16,fp8,0,0.012800000607967377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,2,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,float16,0,0.013807999591032663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,float16,0,0.013056000073750814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,1,128,1,float16,float16,0,0.01313599944114685
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,1,128,1,float16,fp8,0,0.012719999998807907
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,2,128,1,float16,float16,0,0.013050666699806849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,2,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.014853333433469137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,float16,0,0.012661332885424295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.012815999488035837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,1,128,1,float16,float16,0,0.012965332716703415
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,float16,0,0.03753600021203359
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,2,128,1,float16,float16,0,0.012826666235923767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,float16,0,0.012762666990359625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,2,128,1,float16,fp8,0,0.012810666114091873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.01268799975514412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,float16,0,0.012826666235923767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,1,128,1,float16,float16,0,0.014368000129858652
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,1,128,1,float16,float16,0,0.012703999876976013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,2,128,1,float16,float16,0,0.012778667112191519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,2,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,float16,0,0.012837332983811697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,float16,0,0.012762666990359625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,1,128,1,float16,float16,0,0.012639999389648438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,1,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,2,128,1,float16,float16,0,0.012736000120639801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,float16,0,0.013295999417702356
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,2,128,1,float16,fp8,0,0.012666666259368261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.012682666381200155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,float16,0,0.012671999633312225
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,4,1,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.012863999853531519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,1,128,1,float16,fp8,0,0.012618667135636011
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,2,128,1,float16,float16,0,0.012821332861979803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,2,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,float16,0,0.014794666320085526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,float16,0,0.012928000340859095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,1,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,4,2,128,1,float16,float16,0,0.013194666554530462
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,1,128,1,float16,float16,0,0.03754666695992152
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,1,128,1,float16,fp8,0,0.035071998834609985
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,2,128,1,float16,float16,0,0.03889599939187368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,4,2,128,1,float16,fp8,0,0.03408000121514002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,float16,0,0.023311999936898548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.02271999915440877
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,float16,0,0.012741333494583765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,1,128,1,float16,float16,0,0.023189333577950794
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,1,128,1,float16,fp8,0,0.022858666876951855
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,2,128,1,float16,float16,0,0.0230880007147789
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,2,128,1,float16,fp8,0,0.021557333568731945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,float16,0,0.02330133318901062
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,float16,0,0.01516266663869222
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.01498666654030482
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,4,1,128,1,float16,float16,0,0.012800000607967377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.02306666721900304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,1,128,1,float16,float16,0,0.015200000256299973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,1,128,1,float16,fp8,0,0.014837333311637243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,2,128,1,float16,float16,0,0.015135999768972397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,2,128,1,float16,fp8,0,0.01479999969402949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,float16,0,0.01522133375207583
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.014912000546852747
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,float16,0,0.011018666128317514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,2,128,1,float16,float16,0,0.010885333021481832
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,2,128,1,float16,fp8,0,0.010634666929642359
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.009957333405812582
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,1,128,1,float16,float16,0,0.01073066641887029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,1,128,1,float16,fp8,0,0.010645333677530289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,2,128,1,float16,float16,0,0.010751999914646149
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,2,128,1,float16,fp8,0,0.010410666465759277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,float16,0,0.010757333288590113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.01003200002014637
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,float16,0,0.010597333312034607
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,1,128,1,float16,float16,0,0.0106133334338665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.010666667173306147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,1,128,1,float16,fp8,0,0.00915733352303505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,2,128,1,float16,float16,0,0.009434666484594345
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,2,128,1,float16,fp8,0,0.010826667149861654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,float16,0,0.010768000036478043
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,float16,0,0.01044800008336703
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,1,128,1,float16,float16,0,0.009072000160813332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,1,128,1,float16,fp8,0,0.008709333216150602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,2,128,1,float16,float16,0,0.010175999874869982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,2,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,float16,0,0.010543999572594961
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,1,128,1,float16,float16,0,0.00985599992175897
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,1,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,2,128,1,float16,float16,0,0.009461333354314169
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,2,128,1,float16,fp8,0,0.008746666833758354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,float16,0,0.009365333244204521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.009077333534757296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,1,128,1,float16,float16,0,0.010863999525705973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,float16,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,1,128,1,float16,fp8,0,0.010933333386977514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,1,128,1,float16,float16,0,0.009178666397929192
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,1,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,2,128,1,float16,float16,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,float16,0,0.010703999549150467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,1,128,1,float16,float16,0,0.03350399931271871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,1,128,1,float16,fp8,0,0.030133334298928578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,2,128,1,float16,float16,0,0.03324799984693527
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,float16,0,0.02088533341884613
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.019424000134070713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,4,2,128,1,float16,fp8,0,0.02994133283694585
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,1,128,1,float16,float16,0,0.021322667598724365
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,1,128,1,float16,fp8,0,0.018981333822011948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,2,128,1,float16,float16,0,0.02180800090233485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,2,128,1,float16,fp8,0,0.0189280000825723
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,float16,0,0.014917333920796713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,float16,0,0.010741333166758219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,float16,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,1,128,1,float16,float16,0,0.014815999815861383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,1,128,1,float16,fp8,0,0.012810666114091873
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,2,128,1,float16,float16,0,0.013717333475748697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,2,128,1,float16,fp8,0,0.013221333424250284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,float16,0,0.014794666320085526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.012746666868527731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,float16,0,0.010847999403874079
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.010858666151762009
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,1,128,1,float16,fp8,0,0.01071999967098236
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,2,128,1,float16,float16,0,0.01091733326514562
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,2,128,1,float16,fp8,0,0.010746666540702185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,float16,0,0.010741333166758219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.019248000035683315
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,4,2,128,1,float16,fp8,0,0.008645333349704742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,1,128,1,float16,float16,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,1,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,2,128,1,float16,float16,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,2,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,float16,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,float16,0,0.010512000570694605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.008762666955590248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,1,128,1,float16,float16,0,0.008869333192706108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,1,128,1,float16,fp8,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,2,128,1,float16,float16,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,2,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,float16,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,float16,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,1,128,1,float16,float16,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,1,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,2,128,1,float16,float16,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,2,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,float16,0,0.020960000654061634
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.010741333166758219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,float16,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,1,128,1,float16,float16,0,0.008805333326260248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,float16,0,0.009061333412925402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,1,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,2,128,1,float16,float16,0,0.008799999952316284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,2,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.008746666833758354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,float16,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.008623999853928884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,1,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,1,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,2,128,1,float16,float16,0,0.008842666943868002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,2,128,1,float16,fp8,0,0.008682666967312494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.008767999708652496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,4,1,128,1,float16,float16,0,0.010869332899649939
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,1,128,1,float16,float16,0,0.03152533372243246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,1,128,1,float16,fp8,0,0.028160000840822857
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,2,128,1,float16,float16,0,0.031557333966096245
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,float16,0,0.01932266727089882
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,4,2,128,1,float16,fp8,0,0.028522667785485584
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.0191040001809597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,1,128,1,float16,float16,0,0.01932266727089882
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,1,128,1,float16,fp8,0,0.018794666975736618
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.010794666906197866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,2,128,1,float16,fp8,0,0.01903466631968816
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,float16,0,0.019178666174411774
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,float16,0,0.013125333935022354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,1,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,2,128,1,float16,float16,0,0.012703999876976013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,2,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,float16,0,0.0086666668454806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,float16,0,0.013002666334311167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,1,128,1,float16,float16,0,0.010672000547250112
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,1,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,2,128,1,float16,float16,0,0.010746666540702185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,2,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,float16,0,0.01071999967098236
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,float16,0,0.009103999783595404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.008736000085870424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,1,128,1,float16,float16,0,0.009109333157539368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,1,128,1,float16,fp8,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,2,128,1,float16,float16,0,0.008799999952316284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,2,128,1,float16,fp8,0,0.00874133345981439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,1,128,1,float16,float16,0,0.013280000537633896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.018874666343132656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,float16,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.008656000097592672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,1,128,1,float16,float16,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,1,128,1,float16,fp8,0,0.008810666700204214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,2,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,float16,0,0.00922133338948091
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,float16,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,1,128,1,float16,fp8,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,2,128,1,float16,float16,0,0.008762666955590248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,2,128,1,float16,fp8,0,0.008762666955590248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,float16,0,0.008858666444818178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.008661333471536636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,1,128,1,float16,float16,0,0.008661333471536636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,1,128,1,float16,fp8,0,0.008634666601816813
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,4,2,128,1,float16,float16,0,0.02065066620707512
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,2,128,1,float16,float16,0,0.008858666444818178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,2,128,1,float16,fp8,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.009093333035707474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,float16,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,float16,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,1,128,1,float16,float16,0,0.00916800027092298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,4,2,128,1,float16,float16,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,1,128,1,float16,fp8,0,0.008629333227872849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,2,128,1,float16,float16,0,0.008762666955590248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,2,128,1,float16,fp8,0,0.008581333483258883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,float16,0,0.008618666479984919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,4,1,128,1,float16,float16,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,float16,0,0.010693332801262537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,1,128,1,float16,float16,0,0.0312266672650973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,1,128,1,float16,fp8,0,0.027488000690937042
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,2,128,1,float16,fp8,0,0.027434666951497395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,float16,0,0.019061333189407986
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.018976000448067982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,1,128,1,float16,float16,0,0.0189280000825723
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,2,128,1,float16,float16,0,0.019226666539907455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,2,128,1,float16,fp8,0,0.01889066646496455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.01267733300725619
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,float16,0,0.018895999838908512
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.008656000097592672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.018351999421914417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,1,128,1,float16,float16,0,0.013077333569526672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,1,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,2,128,1,float16,float16,0,0.013114667187134424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,2,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,float16,0,0.013061333447694778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,float16,0,0.010773333410422007
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,1,128,1,float16,float16,0,0.010714666297038397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,1,128,1,float16,fp8,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,2,128,1,float16,float16,0,0.010629333555698395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,2,128,1,float16,fp8,0,0.009093333035707474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,float16,0,0.010794666906197866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,1,128,1,float16,float16,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,1,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,2,128,1,float16,float16,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,float16,0,0.009130666653315226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,4,1,128,1,float16,fp8,0,0.018960000326236088
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.009114666531483332
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,1,128,1,float16,float16,0,0.008879999940594038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,1,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,2,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,float16,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,float16,0,0.013130666067202887
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.008789333204428354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,float16,0,0.008816000074148178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.008682666967312494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,1,128,1,float16,float16,0,0.008714666590094566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,1,128,1,float16,fp8,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,2,128,1,float16,float16,0,0.009093333035707474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,2,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,float16,0,0.008709333216150602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,float16,0,0.00867733359336853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,1,128,1,float16,float16,0,0.008656000097592672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,1,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,2,128,1,float16,float16,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,float16,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,2,128,1,float16,fp8,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,4,2,128,1,float16,float16,0,0.0295413335164388
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,float16,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,4,2,128,1,float16,fp8,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,float16,0,0.009098666409651438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,1,128,1,float16,float16,0,0.00867733359336853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,1,128,1,float16,fp8,0,0.008634666601816813
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,2,128,1,float16,fp8,0,0.00855466661353906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,float16,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,float16,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,4,2,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,0,0.011781333635250727
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,0,0.011098666737476984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.010741333166758219
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,0,0.007322666545708974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,0,0.006704000135262807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,0,0.012335999558369318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,0,0.006895999734600385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.008682666967312494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,0,0.006837333242098491
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,0,0.008874666566650072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,0,0.007034666836261749
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,4,2,128,1,float16,float16,0,0.008592000231146812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,0,0.008400000010927519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.0069226666043202085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,0,0.006917333230376244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,0,0.006949333474040031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,0,0.006746666505932808
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,0,0.006634666894872983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.00690133310854435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,0,0.0069226666043202085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.006704000135262807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,0,0.0069866664707660675
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,0,0.006688000013430913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,0,0.008645333349704742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,0,0.0069226666043202085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,0,0.006655999769767125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,0,0.006949333474040031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.007061333085099856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,0,0.007061333085099856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.006709333509206772
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,0,0.006645333642760913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,0,0.008101333553592363
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,0,0.006560000280539195
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,0,0.0069440001000960665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,0,0.006853333363930385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.006624000146985054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,0,0.006655999769767125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.010666667173306147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,0,0.006688000013430913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,0,0.006677333265542984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,0,0.006981333096822103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.006698666761318843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,0,0.0068853336075941724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,0,0.006698666761318843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,0,0.007386666412154834
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,0,0.006698666761318843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,0,0.006821333120266597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,0,0.0069333333522081375
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.00690133310854435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,0,0.006693333387374878
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.006581333155433337
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,0,0.006704000135262807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,0,0.006671999891599019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,0,0.006591999903321266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,0,0.006751999879876773
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,2,1,128,1,float16,float16,0,0.8002879619598389
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16384,2,1,128,1,float16,fp8,0,0.7705333232879639
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,0,0.006810666372378667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,float16,0,0.4954613447189331
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.006586666529377301
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,1,128,1,float16,float16,0,0.48727468649546307
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,1,128,1,float16,fp8,0,0.4456533193588257
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,float16,0,0.4899306694666545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,0,0.006831999868154526
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,0.4487893184026082
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,float16,0,0.32225600878397626
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,0.2882240017255147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,1,128,1,float16,float16,0,0.3176106611887614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.006666666517655055
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,float16,0,0.3158773382504781
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,0.2887146671613057
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,0.18928533792495728
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,1,128,1,float16,float16,0,0.2050559918085734
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,1,128,1,float16,fp8,0,0.19031999508539835
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,float16,0,0.20800532897313437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,0,0.006575999781489372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,0.18633600076039633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,2,1,128,1,float16,float16,0,0.5173439979553223
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,12288,2,1,128,1,float16,fp8,0,0.4695626497268677
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,float16,0,0.29181333382924396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.006624000146985054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,1,128,1,float16,float16,0,0.31353066364924115
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,1,128,1,float16,fp8,0,0.2653440038363139
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,float16,0,0.3125759959220886
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,0.28364266951878864
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,float16,0,0.2055786649386088
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,0.18579200903574625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,1,128,1,float16,float16,0,0.20419732729593912
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,1,128,1,float16,fp8,0,0.18618667125701904
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,float16,0,0.1908586621284485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,0.18614399433135986
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,float16,0,0.16022933522860208
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,0.1444960037867228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,1,128,1,float16,float16,0,0.1606933375199636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,1,128,1,float16,fp8,0,0.14620799819628397
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,0.14453867077827454
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,2,1,128,1,float16,float16,0,0.40299733479817706
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,10240,2,1,128,1,float16,fp8,0,0.34726401170094806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,0.21925334135691324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,1,128,1,float16,float16,0,0.240447998046875
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,2,1,128,1,float16,fp8,0,0.2878986597061157
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,float16,0,0.24343999226888022
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,float16,0,0.2104853391647339
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,0.21725332736968994
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,float16,0,0.14992533127466837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,0.13610133528709412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,1,128,1,float16,float16,0,0.15054933230082193
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,1,128,1,float16,fp8,0,0.1369706690311432
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,0.2775413393974304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,float16,0,0.14917866388956705
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,0.136245330174764
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,float16,0,0.13485866785049438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.12161067128181458
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,1,128,1,float16,float16,0,0.13473600149154663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,1,128,1,float16,fp8,0,0.12157866358757019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,float16,0,0.13366400202115378
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.12338667114575703
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,2,1,128,1,float16,float16,0,0.45590933163960773
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,0.4531946579615275
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,float16,0,0.27450132369995117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,float16,0,0.15779200196266174
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,0.24446932474772134
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,1,128,1,float16,float16,0,0.2523733377456665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,1,128,1,float16,fp8,0,0.2434933384259542
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,float16,0,0.24447466929753622
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,float16,0,0.25090134143829346
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,float16,0,0.17602133750915527
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,1,128,1,float16,float16,0,0.1792693336804708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,1,128,1,float16,fp8,0,0.14713066816329956
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,float16,0,0.175162672996521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,2,1,128,1,float16,fp8,0,0.2196213404337565
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,0.15760533014933267
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,float16,0,0.1030560036500295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,0.10272000233332317
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,1,128,1,float16,float16,0,0.11517332990964253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,1,128,1,float16,fp8,0,0.10295466581980388
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,float16,0,0.10313600301742554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,float16,0,0.11127466956774394
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.09887466828028361
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,1,128,1,float16,float16,0,0.10122133294741313
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,1,128,1,float16,fp8,0,0.10083733002344768
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,float16,0,0.1090880036354065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.09370666742324829
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,2,1,128,1,float16,float16,0,0.285861333211263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,6144,2,1,128,1,float16,fp8,0,0.2595360080401103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,float16,0,0.1587999959786733
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,0.15547733505566916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,1,128,1,float16,float16,0,0.15849600235621134
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,1,128,1,float16,fp8,0,0.14429333806037903
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,float16,0,0.17903999487559
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,0.15517866611480713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,float16,0,0.10498666763305664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,0.10217066605885823
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,1,128,1,float16,float16,0,0.11583999792734782
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,1,128,1,float16,fp8,0,0.09577600161234538
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,8192,2,1,128,1,float16,fp8,0,0.41595200697580975
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,float16,0,0.11342933773994446
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,0.09573333462079366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,float16,0,0.0803466687599818
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.07843733330567677
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,1,128,1,float16,float16,0,0.08481066425641377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,1,128,1,float16,fp8,0,0.07858666777610779
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,float16,0,0.0803306649128596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.07276266813278198
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,float16,0,0.08387733499209087
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.07764799892902374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,1,128,1,float16,float16,0,0.07904000083605449
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,1,128,1,float16,fp8,0,0.07660266757011414
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,0.24500266710917154
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,float16,0,0.0786186655362447
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,float16,0,0.15497600038846335
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,0.13691199819246927
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,1,128,1,float16,float16,0,0.14061333735783896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,2,1,128,1,float16,float16,0,0.25803200403849286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,1,128,1,float16,fp8,0,0.13769599795341492
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,float16,0,0.15747732917467752
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,0.10268266995747884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,0.1276586651802063
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,float16,0,0.10091200470924377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.08893866340319316
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,1,128,1,float16,float16,0,0.10081066687901814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,1,128,1,float16,fp8,0,0.08961066603660583
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,float16,0,0.09083200494448344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,float16,0,0.057855998476346336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.05695466697216034
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,1,128,1,float16,float16,0,0.06402133405208588
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,1,128,1,float16,fp8,0,0.0533493310213089
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,float16,0,0.0580320010582606
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.05658666789531708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,float16,0,0.06027733286221822
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.05189866820971171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,1,128,1,float16,float16,0,0.05996799965699514
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,1,128,1,float16,fp8,0,0.05599466462930044
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,float16,0,0.060319999853769936
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.05165866514046987
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,float16,0,0.05606933434804281
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.054330666859944664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,1,128,1,float16,float16,0,0.059903999169667564
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,1,128,1,float16,fp8,0,0.05156800150871277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,float16,0,0.05783466498057047
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.054048001766204834
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,float16,0,0.10192533334096272
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,2,1,128,1,float16,fp8,0,0.13929067055384317
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,0.08544533451398213
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,1,128,1,float16,float16,0,0.1004960040251414
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,1,128,1,float16,fp8,0,0.08379733562469482
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,0.15778133273124695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.07217066486676534
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,float16,0,0.09361599882443745
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,0.0867146650950114
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.060032000144322716
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,1,128,1,float16,float16,0,0.061247999469439186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,4096,2,1,128,1,float16,fp8,0,0.2355039914449056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,1,128,1,float16,fp8,0,0.06018133461475372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,float16,0,0.06717333197593689
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.056015998125076294
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.04568000137805939
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,float16,0,0.049866666396458946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,1,128,1,float16,float16,0,0.04814933240413666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,1,128,1,float16,fp8,0,0.04141333450873693
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,float16,0,0.04959466556708018
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.0452106644709905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,float16,0,0.047584002216657005
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.08410132924715678
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.04141866664091746
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,1,128,1,float16,float16,0,0.04764266808827718
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,1,128,1,float16,fp8,0,0.04384533564249674
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,float16,0,0.04653333127498627
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.04390933116277059
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,float16,0,0.04367466767628988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.043568000197410583
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,1,128,1,float16,float16,0,0.04369066655635834
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,1,128,1,float16,fp8,0,0.04383466641108195
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.041402667760849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,1,128,1,float16,float16,0,0.08451732993125916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,0.0764213353395462
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,float16,0,0.0913759966691335
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,2,1,128,1,float16,float16,0,0.15440533558527628
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,3072,2,1,128,1,float16,float16,0,0.16502400239308676
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,1,128,1,float16,fp8,0,0.07482133309046428
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,float16,0,0.08657067020734151
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,0.07761066655317943
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.052746668457984924
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,1,128,1,float16,float16,0,0.054341331124305725
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.05243200063705444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,1,128,1,float16,fp8,0,0.04985066751639048
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,float16,0,0.0336053321758906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,float16,0,0.06530133386452992
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.03397866586844126
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,1,128,1,float16,fp8,0,0.03142933299144109
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.03338133295377096
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,float16,0,0.03724266588687897
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,float16,0,0.033546666304270424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.03329066683848699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,1,128,1,float16,float16,0,0.03536533315976461
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,1,128,1,float16,fp8,0,0.031173333525657654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,float16,0,0.03537066777547201
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.0332640012105306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,float16,0,0.03330666571855545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.03230933348337809
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,1,128,1,float16,float16,0,0.035071998834609985
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,1,128,1,float16,fp8,0,0.03278933217128118
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,float16,0,0.0352906659245491
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.03193599979082743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,float16,0,0.04387199878692627
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,float16,0,0.03310399999221166
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,1,128,1,float16,float16,0,0.03331200033426285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,1,128,1,float16,fp8,0,0.031541332602500916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,float16,0,0.03252266595760981
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.03145066648721695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,2,1,128,1,float16,float16,0,0.10046933094660442
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,2048,2,1,128,1,float16,fp8,0,0.13940266768137613
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,float16,0,0.05845333139101664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1536,2,1,128,1,float16,fp8,0,0.08536000053087871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.05611733098824819
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,float16,0,0.05896000067392985
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,1,128,1,float16,float16,0,0.05773333211739858
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,1,128,1,float16,fp8,0,0.0517493337392807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,float16,0,0.058373332023620605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.055733333031336464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,float16,0,0.04133866727352142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,float16,0,0.057962665955225624
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,1,128,1,float16,float16,0,0.0406986673672994
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.035690667728583016
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,float16,0,0.04177600145339966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,float16,0,0.02935466667016347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,1,128,1,float16,float16,0,0.029205332199732464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,float16,0,0.02918400118748347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.027386667827765148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.027162666122118633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,1,128,1,float16,float16,0,0.029109333952267964
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,1,128,1,float16,fp8,0,0.025290665527184803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.03163733333349228
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,float16,0,0.02924799919128418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.027189334233601887
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,float16,0,0.02810666710138321
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.02717866748571396
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,1,128,1,float16,float16,0,0.029157333076000214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,1,128,1,float16,fp8,0,0.027130665878454845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,float16,0,0.02739199995994568
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.027263998985290527
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,float16,0,0.027701333165168762
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.027098665634791057
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,1,128,1,float16,float16,0,0.028138667345046997
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.03749333322048187
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,1,128,1,float16,fp8,0,0.02703999976317088
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,2,1,128,1,float16,fp8,0,0.035536001125971474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.027119999130566914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.027162666122118633
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,2,1,128,1,float16,fp8,0,0.027327999472618103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,float16,0,0.028746667007605236
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,float16,0,0.05983999868233999
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.0524533341328303
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,2,1,128,1,float16,float16,0,0.09496000409126282
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,1,128,1,float16,float16,0,0.061119998494784035
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,1,128,1,float16,fp8,0,0.053077335158983864
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,float16,0,0.05913599828879038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,float16,0,0.037578667203585304
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.0509493350982666
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.033861334125200905
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,1,128,1,float16,float16,0,0.039333333571751915
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,1,128,1,float16,fp8,0,0.03494933247566223
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.03387200087308884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,float16,0,0.03730133424202601
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,float16,0,0.024293333292007446
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.021205333371957142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,1,128,1,float16,float16,0,0.023418667415777843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,1,128,1,float16,fp8,0,0.021210665504137676
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.021509334444999695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,float16,0,0.023007998863856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.021130666136741638
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,1,128,1,float16,float16,0,0.023045333723227184
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,1,128,1,float16,fp8,0,0.02128533273935318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,float16,0,0.02179733415444692
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.021045332153638203
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,float16,0,0.02370133250951767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.021242665747801464
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,1,128,1,float16,float16,0,0.020981334149837494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,2,1,128,1,float16,float16,0,0.03487999985615412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,float16,0,0.021274665991465252
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,float16,0,0.020986666282018025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.021205333371957142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,1,128,1,float16,float16,0,0.020917333662509918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,float16,0,0.02102400114138921
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,float16,0,0.021018666525681812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1024,2,1,128,1,float16,fp8,0,0.09221866726875305
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.01934933289885521
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,1,128,1,float16,fp8,0,0.019317333896954853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,1,128,1,float16,float16,0,0.021066665649414062
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,float16,0,0.021082667013009388
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.0195573332409064
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,float16,0,0.024373332659403484
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,2,1,128,1,float16,float16,0,0.07105599840482076
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,float16,0,0.04437333345413208
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,1,128,1,float16,fp8,0,0.02094399929046631
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.020954666038354237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.03746666759252548
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,1,128,1,float16,fp8,0,0.021189334491888683
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,1,128,1,float16,fp8,0,0.0395413339138031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,float16,0,0.04550399879614512
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.03740799923737844
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.02499733368555705
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,1,128,1,float16,float16,0,0.027093333502610523
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,1,128,1,float16,fp8,0,0.02515733242034912
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,float16,0,0.017157333592573803
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.024986666937669117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.01672533278663953
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,1,128,1,float16,fp8,0,0.016783999900023144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,float16,0,0.01711999997496605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.016879999389251072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,float16,0,0.016623999923467636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.01516266663869222
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,1,128,1,float16,float16,0,0.016741332908471424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,1,128,1,float16,fp8,0,0.01516266663869222
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,float16,0,0.01691199963291486
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.014901333798964819
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,float16,0,0.0271573339899381
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,float16,0,0.01504533365368843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.01498666654030482
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,1,128,1,float16,float16,0,0.014869333555301031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,1,128,1,float16,fp8,0,0.014746667196353277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,float16,0,0.016832000265518825
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.014720000326633453
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,2,1,128,1,float16,float16,0,0.0447626660267512
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,float16,0,0.014991999914248785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.01481066644191742
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,1,128,1,float16,float16,0,0.015072000523408255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,1,128,1,float16,fp8,0,0.014730667074521383
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,float16,0,0.01607999950647354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.014757333944241205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,float16,0,0.014912000546852747
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,1,128,1,float16,float16,0,0.01515199989080429
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,float16,0,0.027482666075229645
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,1,128,1,float16,fp8,0,0.01482133318980535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,float16,0,0.015087999403476715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.014789332946141561
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,float16,0,0.014954666296641031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,1,128,1,float16,float16,0,0.015066667149464289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,1,128,1,float16,fp8,0,0.014746667196353277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,float16,0,0.015168000012636185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.014975999792416891
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,float16,0,0.026949333647886913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,2,1,128,1,float16,float16,0,0.01692266638080279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,float16,0,0.02091199904680252
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.019050666441520054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,1,128,1,float16,float16,0,0.021055998901526134
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,2,1,128,1,float16,float16,0,0.03532266616821289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,1,128,1,float16,fp8,0,0.01889066646496455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,256,2,1,128,1,float16,fp8,0,0.03152533372243246
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,float16,0,0.013199999928474426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,float16,0,0.02089600016673406
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,512,2,1,128,1,float16,fp8,0,0.06791466474533081
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.013194666554530462
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,1,128,1,float16,float16,0,0.013749333719412485
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,1,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,float16,0,0.013104000439246496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,float16,0,0.013045333325862885
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,float16,0,0.012746666868527731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,float16,0,0.012698666503032049
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.01268799975514412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,1,128,1,float16,float16,0,0.012906666845083237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,1,128,1,float16,fp8,0,0.012815999488035837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,float16,0,0.012896000097195307
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,float16,0,0.012831999609867731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,1,128,1,float16,float16,0,0.012826666235923767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,1,128,1,float16,fp8,0,0.012757333616415659
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,float16,0,0.012703999876976013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.01267733300725619
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,float16,0,0.012847999731699625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,1,128,1,float16,fp8,0,0.012725333372751871
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,float16,0,0.012784000486135483
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,float16,0,0.012730666746695837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.012453333785136541
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,1,128,1,float16,float16,0,0.012794667234023413
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,1,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,float16,0,0.012682666381200155
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.012256000190973282
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.01926933353145917
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.012063999970753988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,1,128,1,float16,float16,0,0.01351999988158544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,1,128,1,float16,fp8,0,0.012117333710193634
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.012479999413092932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,float16,0,0.012746666868527731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,1,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,float16,0,0.01482133318980535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,2,1,128,1,float16,float16,0,0.023317334552605946
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,128,2,1,128,1,float16,fp8,0,0.021957332889238994
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,1,128,1,float16,float16,0,0.015119999647140503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,1,128,1,float16,fp8,0,0.015194666882356008
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,1,128,1,float16,float16,0,0.012661332885424295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,float16,0,0.015205333630243937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.014842666685581207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.012800000607967377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.010863999525705973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,1,128,1,float16,float16,0,0.010965333630641302
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,1,128,1,float16,fp8,0,0.010879999647537867
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,float16,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.010746666540702185
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.010485333700974783
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,1,128,1,float16,fp8,0,0.010869332899649939
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,float16,0,0.010698666175206503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.010757333288590113
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,float16,0,0.01099733387430509
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,1,128,1,float16,float16,0,0.01073066641887029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,1,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,float16,0,0.010954666882753372
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,float16,0,0.010650667051474253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.009125333279371262
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,1,128,1,float16,float16,0,0.010234666367371878
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,1,128,1,float16,fp8,0,0.00956266683836778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,2,1,128,1,float16,float16,0,0.012906666845083237
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,float16,0,0.010682666053374609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.009349333122372627
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,float16,0,0.010682666053374609
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,1,128,1,float16,float16,0,0.010384000216921171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,1,128,1,float16,fp8,0,0.009103999783595404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,float16,0,0.009935999910036722
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,float16,0,0.010090666512648264
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,1,128,1,float16,float16,0,0.009989333028594652
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,1,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,float16,0,0.010597333312034607
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,float16,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.008821333448092142
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,1,128,1,float16,float16,0,0.009813333551088968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,1,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,float16,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,float16,0,0.013221333424250284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,2,1,128,1,float16,float16,0,0.021130666136741638
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,64,2,1,128,1,float16,fp8,0,0.019146667172511418
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,float16,0,0.010778666784365972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,2,1,128,1,float16,float16,0,0.010661333799362183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,1,128,1,float16,float16,0,0.014789332946141561
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,1,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,float16,0,0.012858666479587555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,float16,0,0.013183999806642532
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.010357333347201347
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.010666667173306147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,1,128,1,float16,float16,0,0.010565333068370819
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,1,128,1,float16,fp8,0,0.010666667173306147
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,float16,0,0.010858666151762009
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.009813333551088968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.008816000074148178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,1,128,1,float16,float16,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,1,128,1,float16,fp8,0,0.008805333326260248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,float16,0,0.009722666814923286
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,float16,0,0.009103999783595404
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,1,128,1,float16,float16,0,0.009045333291093508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,1,128,1,float16,fp8,0,0.008757333581646284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,float16,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.008725333337982496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.008767999708652496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,1,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,1,128,1,float16,fp8,0,0.008709333216150602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,1,128,1,float16,float16,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,1,128,1,float16,fp8,0,0.009093333035707474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,float16,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,float16,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,1,128,1,float16,fp8,0,0.008762666955590248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,float16,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,float16,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,float16,0,0.010784000158309937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,1,128,1,float16,float16,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,1,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,float16,0,0.008762666955590248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.008661333471536636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,float16,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,2,1,128,1,float16,float16,0,0.01987733319401741
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,float16,0,0.01310933381319046
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,32,2,1,128,1,float16,fp8,0,0.018981333822011948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,1,128,1,float16,float16,0,0.013157332936922709
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,float16,0,0.013077333569526672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,1,128,1,float16,float16,0,0.01089599976936976
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,1,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,float16,0,0.01080000028014183
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.00884799969693025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,float16,0,0.009061333412925402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,1,128,1,float16,float16,0,0.009402666861812273
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,float16,0,0.010677333921194077
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,1,128,1,float16,fp8,0,0.008901333436369896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,float16,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.008650666723648706
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,float16,0,0.008725333337982496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,1,128,1,float16,float16,0,0.008725333337982496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,1,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,float16,0,0.008639999975760778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.008954666554927826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,1,128,1,float16,fp8,0,0.012741333494583765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,1,128,1,float16,fp8,0,0.008623999853928884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,2,1,128,1,float16,float16,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.008736000085870424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,float16,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.012752000242471695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,1,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,float16,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,float16,0,0.008709333216150602
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,1,128,1,float16,float16,0,0.008863999818762144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,1,128,1,float16,fp8,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,float16,0,0.008656000097592672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,float16,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,1,128,1,float16,float16,0,0.009098666409651438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,1,128,1,float16,fp8,0,0.00855466661353906
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,float16,0,0.008703999842206636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.008725333337982496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,2,1,128,1,float16,float16,0,0.01916266605257988
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,16,2,1,128,1,float16,fp8,0,0.01883200059334437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,2,1,128,1,float16,float16,0,0.008805333326260248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,float16,0,0.012965332716703415
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,1,128,1,float16,float16,0,0.008746666833758354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.012698666503032049
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,1,128,1,float16,fp8,0,0.01268799975514412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,float16,0,0.012954667210578918
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.009045333291093508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,1,128,1,float16,float16,0,0.0106133334338665
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,1,128,1,float16,fp8,0,0.00916800027092298
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,float16,0,0.010650667051474253
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,float16,0,0.009679999823371569
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.00879466657837232
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,1,128,1,float16,float16,0,0.008997333546479544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,1,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,float16,0,0.009077333534757296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,1,128,1,float16,float16,0,0.009008000294367472
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,1,128,1,float16,fp8,0,0.008858666444818178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,float16,0,0.008656000097592672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,float16,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.008623999853928884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,1,128,1,float16,float16,0,0.008912000184257826
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,1,128,1,float16,fp8,0,0.008586666857202848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,float16,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,float16,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.009082666908701261
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,1,128,1,float16,float16,0,0.009061333412925402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,1,128,1,float16,fp8,0,0.009093333035707474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,float16,0,0.008757333581646284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,float16,0,0.01055466632048289
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,float16,0,0.00897066667675972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.008767999708652496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,1,128,1,float16,fp8,0,0.008581333483258883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,float16,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,float16,0,0.008586666857202848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,1,128,1,float16,float16,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,1,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,float16,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,float16,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,0,0.009050666665037474
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,0,0.009077333534757296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,0,0.008623999853928884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,0,0.008629333227872849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,0,0.00697066696981589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,2,1,128,1,float16,float16,0,0.013125333935022354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,0,0.00877333308259646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.008725333337982496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.00666133314371109
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,0,0.006874666859706243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,2,1,128,1,float16,float16,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.00701333334048589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,0,0.006666666517655055
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.006837333242098491
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,0,0.006693333387374878
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,0,0.006800000245372455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.006751999879876773
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,0,0.0069759997228781385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.006821333120266597
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,0,0.007029333462317784
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,0,0.006789333497484525
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,0,0.006645333642760913
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,0,0.007061333085099856
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.0069919998447100324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,0,0.006810666372378667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,0,0.00697066696981589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,0,0.006815999746322632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.006538666784763336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,0,0.006671999891599019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.006768000001708667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,0,0.006704000135262807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,0,0.0069226666043202085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,0,0.006917333230376244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.006640000268816948
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,0,0.006981333096822103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.008687999720374743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,0,0.006624000146985054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,0,0.007071999832987785
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,0,0.006671999891599019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.006805333619316419
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,0,0.007034666836261749
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,0,0.006655999769767125
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,0,0.0069386667261521024
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,0,0.00666133314371109
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.006682666639486949
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,float16,0,0.3283626635869344
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,0,0.006927999978264173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,0.2893600066502889
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,float16,0,0.3203413287798564
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,0.2886400024096171
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,float16,0,0.20897066593170166
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,0.17965332667032877
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,float16,0,0.20834134022394815
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.006741333131988843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,float16,0,0.20469866196314493
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.17897067467371622
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,float16,0,0.20387732982635498
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.17806933323542276
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,float16,0,0.20546666781107584
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,0.17287466923395792
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,float16,0,0.2063573400179545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.006949333474040031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,0.17264533042907715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,float16,0,0.159578671058019
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,0.13613866766293845
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.006634666894872983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,0.13863999644915262
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,float16,0,0.14683733383814493
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.14205867052078247
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,float16,0,0.1544373333454132
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.135754664738973
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,float16,0,0.14760532975196838
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,0.13596266508102417
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,float16,0,0.13801067074139914
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,float16,0,0.1346666713555654
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.11547733346621196
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,0,0.006864000111818314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,float16,0,0.1341866652170817
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.11552000045776367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,float16,0,0.12369599938392639
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.008581333483258883
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.1218239963054657
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,float16,0,0.12366400162378947
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.11506666739781697
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,float16,0,0.17289066314697266
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,0.15635200341542563
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,float16,0,0.175653338432312
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,float16,0,0.10320533315340678
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,0.17937066157658896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,0.10281067093213399
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,float16,0,0.11100266377131145
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,0.10230400164922078
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,float16,0,0.10126399993896484
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.09490666786829631
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,float16,0,0.10288533568382263
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.10081600149472554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,float16,0,0.10730666915575664
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.09289066990216573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,float16,0,0.1009173293908437
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,float16,0,0.11441600322723389
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,0.1032960017522176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,float16,0,0.10529067118962605
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,0.10313600301742554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,float16,0,0.08685333530108134
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.0728053351243337
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,float16,0,0.07994666695594788
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.0743093341588974
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.0724373310804367
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,float16,0,0.07826666533946991
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.07226133346557617
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,float16,0,0.07838400204976399
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.07208000123500824
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,float16,0,0.14829333623250326
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,float16,0,0.07821333408355713
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.07256533205509186
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,float16,0,0.09090666969617207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.0824480007092158
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,float16,0,0.10001599788665771
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.08939199646313985
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,float16,0,0.05788266658782959
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.05264000097910563
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,float16,0,0.06235733131567637
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.05693866809209188
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,0.13539733489354452
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,float16,0,0.0557226687669754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.05186133086681366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,float16,0,0.06039999922116598
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.0554613322019577
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,float16,0,0.05973866581916809
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.05154666801293691
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,float16,0,0.0558240016301473
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.05571199953556061
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,float16,0,0.056048000852266945
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.054048001766204834
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.0517493337392807
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,float16,0,0.0666720022757848
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.058778668443361916
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,float16,0,0.06577600042025249
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.05599466462930044
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,float16,0,0.049882665276527405
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.044682666659355164
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.0415040006240209
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,float16,0,0.04770666857560476
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.043807998299598694
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,float16,0,0.04358933369318644
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.09878399968147278
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.04355733096599579
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,float16,0,0.047594666481018066
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.04382933179537455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.04368533194065094
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,float16,0,0.043866669138272606
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.041663999358812966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,float16,0,0.08449066678682964
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,float16,0,0.04382933179537455
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.04268800218900045
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,float16,0,0.0581279993057251
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.052655999859174095
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,float16,0,0.05996266504128774
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.049695998430252075
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,float16,0,0.03759466608365377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.033546666304270424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,float16,0,0.036677333215872444
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.033589333295822144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,float16,0,0.03535466641187668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.03215999901294708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,float16,0,0.03532800078392029
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.032826667030652366
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,float16,0,0.03438399980465571
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.03221333275238673
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,float16,0,0.034645333886146545
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,float16,0,0.055685331424077354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.030479999879995983
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,float16,0,0.032431999842325844
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.03159466634194056
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,float16,0,0.03194133440653483
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.03140799949566523
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,float16,0,0.032629333436489105
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,0.1462399959564209
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.031530665854612984
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,float16,0,0.03183466692765554
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.031210665901501972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,float16,0,0.041482667128245033
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,float16,0,0.0496373325586319
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.0378560001651446
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,float16,0,0.04177600145339966
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.035360001027584076
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.02717333287000656
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,float16,0,0.029472000896930695
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.027488000690937042
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.025397333006064098
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,float16,0,0.029146666328112285
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,float16,0,0.04754666487375895
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.02716800073782603
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,float16,0,0.02777066578467687
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,float16,0,0.029215998947620392
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.025194667279720306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.02701866626739502
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,float16,0,0.028832000990708668
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.02532800038655599
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,float16,0,0.027434666951497395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,float16,0,0.026901334524154663
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.02500266581773758
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,float16,0,0.03654933224121729
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,float16,0,0.0352906659245491
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.0335413341720899
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.021946666141351063
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,float16,0,0.024325333535671234
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.02145066608985265
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,float16,0,0.02735999971628189
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,float16,0,0.023344000180562336
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.02090666691462199
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,float16,0,0.029045333464940388
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,float16,0,0.02332799881696701
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.021087999145189922
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,float16,0,0.023077333966890972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.025301332275072735
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,float16,0,0.0271573339899381
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,float16,0,0.021221332252025604
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.021253332495689392
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.019472000499566395
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,float16,0,0.02165333429972331
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.02720000098148982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,float16,0,0.020874666670958202
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.021114667256673176
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,float16,0,0.02126399924357732
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.019248000035683315
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,float16,0,0.021125334004561108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.02096533278624217
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,float16,0,0.020938667158285778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.03339199970165888
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.020848001043001812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,float16,0,0.02316266546646754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,float16,0,0.02741866558790207
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,float16,0,0.02701333413521449
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.02536533276240031
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.02491733431816101
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,float16,0,0.017024000485738117
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.016885332763195038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,float16,0,0.01699200024207433
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.016885332763195038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.014949332922697067
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,float16,0,0.01687466725707054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,float16,0,0.016778666526079178
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.015141333142916361
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,float16,0,0.014789332946141561
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.01470400020480156
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,float16,0,0.015109332899252573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.014762666076421738
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,float16,0,0.014837333311637243
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.021295999487241108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.014682666709025701
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,float16,0,0.020938667158285778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,float16,0,0.015125333021084467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.015008000036080679
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,float16,0,0.014826666563749313
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.01959466685851415
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.014831999937693277
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,float16,0,0.014933332800865173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.015087999403476715
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,float16,0,0.014853333433469137
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.014757333944241205
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,float16,0,0.014864000181357065
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,float16,0,0.01509333277742068
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,float16,0,0.01482133318980535
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,float16,0,0.01301866645614306
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.012800000607967377
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,float16,0,0.013002666334311167
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,float16,0,0.012703999876976013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,float16,0,0.016997333616018295
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,float16,0,0.013056000073750814
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.012757333616415659
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,float16,0,0.01268799975514412
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,float16,0,0.015279999623696009
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.012730666746695837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,float16,0,0.012858666479587555
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,float16,0,0.012847999731699625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.012815999488035837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,float16,0,0.012815999488035837
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,float16,0,0.012746666868527731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.012703999876976013
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,float16,0,0.012773333738247553
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.0129120002190272
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,float16,0,0.012746666868527731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.014783999572197596
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,float16,0,0.010794666906197866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.010725333044926325
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,float16,0,0.011039999624093374
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,float16,0,0.010698666175206503
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,float16,0,0.01310933381319046
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.00901333304742972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.010821333775917688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,float16,0,0.010794666906197866
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,float16,0,0.01062400018175443
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.008837333569924036
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,float16,0,0.00895999992887179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.008976000050703684
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,float16,0,0.01033599985142549
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,float16,0,0.01441066712141037
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,float16,0,0.010373333469033241
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.008853333070874214
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,float16,0,0.009248000259200731
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,float16,0,0.010175999874869982
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,float16,0,0.010373333469033241
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,float16,0,0.012709333250919977
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,float16,0,0.008943999807039896
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.009077333534757296
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.009178666397929192
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.009061333412925402
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,float16,0,0.00902399979531765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,float16,0,0.010709332923094431
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.009973333527644476
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,float16,0,0.0107893335322539
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,float16,0,0.009941333283980688
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,float16,0,0.010570666442314783
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,float16,0,0.00902399979531765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.00902399979531765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.010687999427318573
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,float16,0,0.008863999818762144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.008714666590094566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,float16,0,0.008714666590094566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,float16,0,0.00871999996403853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.008874666566650072
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,float16,0,0.008682666967312494
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,float16,0,0.008885333314538002
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.00873066671192646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,float16,0,0.00902399979531765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,float16,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,float16,0,0.009018666421373686
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,float16,0,0.008618666479984919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.008672000219424566
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,float16,0,0.00873066671192646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,float16,0,0.009002666920423508
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,float16,0,0.010703999549150467
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.00914666677514712
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,float16,0,0.010735999792814255
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.009205333267649015
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,float16,0,0.009488000224033991
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,float16,0,0.009397333487868309
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.009088000282645226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,float16,0,0.008896000062425932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,float16,0,0.00884799969693025
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.009066666786869368
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,float16,0,0.00903466654320558
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.008661333471536636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,float16,0,0.00890666681031386
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,float16,0,0.008725333337982496
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.00877333308259646
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,float16,0,0.009056000038981438
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,float16,0,0.008986666798591614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,float16,0,0.00874133345981439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.00867733359336853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,float16,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.008597333605090777
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,float16,0,0.008687999720374743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.009029333169261614
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,float16,0,0.00860799973209699
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.00867733359336853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,float16,0,0.009088000282645226
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.008778666456540426
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,float16,0,0.010784000158309937
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,float16,0,0.010608000059922537
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,float16,0,0.00892800030608972
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.008965333302815756
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,float16,0,0.00922133338948091
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.008703999842206636
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,float16,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.008938666433095932
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,float16,0,0.00898133342464765
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,float16,0,0.008752000207702318
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.0086666668454806
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,float16,0,0.008736000085870424
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.008789333204428354
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.008639999975760778
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.008623999853928884
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.008693333094318708
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,float16,0,0.008687999720374743
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.008879999940594038
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,float16,0,0.00867733359336853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.008698666468262672
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,float16,0,0.00867733359336853
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,float16,0,0.008762666955590248
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.00874133345981439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,float16,0,0.008863999818762144
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.008757333581646284
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,float16,0,0.008618666479984919
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.008592000231146812
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,0,0.008613333106040955
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.006784000123540561
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.00696000022192796
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,0,0.0069226666043202085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.006815999746322632
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,0,0.006666666517655055
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.009152000149091085
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.006927999978264173
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.006735999758044879
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,0,0.006864000111818314
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,float16,0,0.00915733352303505
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,0,0.006762666627764702
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.006981333096822103
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,0,0.0069919998447100324
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.006768000001708667
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.006693333387374878
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,0,0.00690133310854435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.006837333242098491
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,0,0.006890666360656421
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,float16,0,0.008949333180983862
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.00702400008837382
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,0,0.0069759997228781385
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.006858666737874349
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,0,0.00679466687142849
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.006917333230376244
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,0,0.00697066696981589
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.006698666761318843
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,0,0.00666133314371109
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.006714666883150737
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,0,0.0068853336075941724
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.006624000146985054
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,0,0.00878399983048439
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.009039999917149544
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,0,0.006911999856432279
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.00690133310854435
VLLM,0.17.0,NVIDIA B200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,0,0.006949333474040031
