framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,64,1,128,1,float16,fp8,0,21.647989908854168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,64,2,128,1,float16,fp8,0,23.80010732014974
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,64,1,128,1,float16,float16,0,39.18427276611328
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,64,2,128,1,float16,float16,0,37.084431966145836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,64,4,128,1,float16,fp8,0,24.257044474283855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,64,4,128,1,float16,float16,0,40.923929850260414
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,64,8,128,1,float16,fp8,0,23.180709838867188
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,64,64,128,1,float16,float16,0,23.85138193766276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,64,64,128,1,float16,fp8,0,12.39465077718099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,64,8,128,1,float16,float16,0,43.03165181477865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,64,1,128,1,float16,float16,0,20.559237162272137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,64,1,128,1,float16,fp8,0,12.170144399007162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,64,2,128,1,float16,fp8,0,11.364021301269531
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,64,2,128,1,float16,float16,0,19.69228744506836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,64,4,128,1,float16,fp8,0,12.301114400227865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,64,4,128,1,float16,float16,0,21.213013966878254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,64,8,128,1,float16,fp8,0,10.840763092041016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,64,8,128,1,float16,float16,0,20.87060801188151
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,64,64,128,1,float16,float16,0,10.784309387207031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,64,64,128,1,float16,fp8,0,6.124656041463216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,64,1,128,1,float16,float16,0,7.831573486328125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,64,1,128,1,float16,fp8,0,5.24232546488444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,64,2,128,1,float16,fp8,0,5.227834701538086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,64,2,128,1,float16,float16,0,10.669204711914062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,64,4,128,1,float16,fp8,0,5.321530659993489
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,64,4,128,1,float16,float16,0,10.142255783081055
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,64,8,128,1,float16,float16,0,8.681525548299154
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,64,8,128,1,float16,fp8,0,5.856522878011067
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,64,64,128,1,float16,float16,0,3.415226618448893
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,64,1,128,1,float16,float16,0,3.4853814442952475
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,64,1,128,1,float16,fp8,0,2.6441334088643393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,64,64,128,1,float16,fp8,0,3.1160427729288735
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,64,2,128,1,float16,float16,0,3.523733456929525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,64,2,128,1,float16,fp8,0,2.6669333775838218
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,64,4,128,1,float16,float16,0,3.5213705698649087
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,64,4,128,1,float16,fp8,0,2.7587254842122397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,64,8,128,1,float16,fp8,0,2.783914566040039
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,64,8,128,1,float16,float16,0,4.743391990661621
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,64,1,128,1,float16,fp8,0,12.436421712239584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,64,2,128,1,float16,fp8,0,12.276911417643229
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,64,2,128,1,float16,float16,0,22.5111567179362
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,64,1,128,1,float16,float16,0,26.14960479736328
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,64,4,128,1,float16,fp8,0,13.187301635742188
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,64,4,128,1,float16,float16,0,22.464576721191406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,64,8,128,1,float16,fp8,0,14.752320607503256
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,64,64,128,1,float16,float16,0,11.039535522460938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,64,64,128,1,float16,fp8,0,7.398202896118164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,64,8,128,1,float16,float16,0,23.188880920410156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,64,1,128,1,float16,float16,0,11.955205281575521
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,64,1,128,1,float16,fp8,0,5.84938112894694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,64,2,128,1,float16,float16,0,6.866933186848958
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,64,2,128,1,float16,fp8,0,6.147541046142578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,64,4,128,1,float16,fp8,0,6.299343744913737
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,64,8,128,1,float16,fp8,0,6.22981325785319
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,64,4,128,1,float16,float16,0,13.087877909342447
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,64,8,128,1,float16,float16,0,11.468053181966146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,64,64,128,1,float16,fp8,0,3.730746587117513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,64,64,128,1,float16,float16,0,4.230138778686523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,64,1,128,1,float16,float16,0,5.8358503977457685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,64,1,128,1,float16,fp8,0,3.6324853897094727
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,64,2,128,1,float16,float16,0,3.509018580118815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,64,2,128,1,float16,fp8,0,4.174015998840332
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,64,4,128,1,float16,fp8,0,3.206533432006836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,64,4,128,1,float16,float16,0,4.966981252034505
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,64,8,128,1,float16,float16,0,4.297520001729329
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,64,8,128,1,float16,fp8,0,3.1375732421875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,64,1,128,1,float16,fp8,0,1.547808011372884
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,64,64,128,1,float16,float16,0,2.0604960123697915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,64,1,128,1,float16,float16,0,1.992304007212321
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,64,64,128,1,float16,fp8,0,1.9099094072977703
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,64,2,128,1,float16,float16,0,1.8267626762390137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,64,2,128,1,float16,fp8,0,1.7070399920145671
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,64,4,128,1,float16,float16,0,2.042970657348633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,64,4,128,1,float16,fp8,0,1.6345547040303547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,64,8,128,1,float16,fp8,0,1.636367956797282
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,64,8,128,1,float16,float16,0,1.9397652943929036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,64,1,128,1,float16,fp8,0,9.933130900065104
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,64,2,128,1,float16,fp8,0,8.873418807983398
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,64,1,128,1,float16,float16,0,15.384634653727213
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,64,2,128,1,float16,float16,0,15.813690185546875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,64,4,128,1,float16,fp8,0,8.775541305541992
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,64,4,128,1,float16,float16,0,17.876074473063152
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,64,8,128,1,float16,fp8,0,8.9388427734375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,64,8,128,1,float16,float16,0,15.829760233561197
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,64,64,128,1,float16,float16,0,6.58073616027832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,64,1,128,1,float16,float16,0,5.86411730448405
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,64,1,128,1,float16,fp8,0,4.2546132405598955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,64,64,128,1,float16,fp8,0,5.393264134724935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,64,2,128,1,float16,float16,0,4.9779253005981445
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,64,2,128,1,float16,fp8,0,4.266234715779622
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,64,4,128,1,float16,fp8,0,4.4410826365153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,64,4,128,1,float16,float16,0,5.428698857625325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,64,8,128,1,float16,fp8,0,4.450058619181315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,64,8,128,1,float16,float16,0,6.389167785644531
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,64,64,128,1,float16,float16,0,2.9880266189575195
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,64,64,128,1,float16,fp8,0,2.750896135965983
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,64,1,128,1,float16,float16,0,2.5767200787862143
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,64,1,128,1,float16,fp8,0,2.3313280741373696
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,64,2,128,1,float16,fp8,0,2.16865603129069
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,64,2,128,1,float16,float16,0,2.5769972801208496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,64,4,128,1,float16,float16,0,2.906976064046224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,64,4,128,1,float16,fp8,0,2.288362661997477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,64,8,128,1,float16,float16,0,2.573823928833008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,64,8,128,1,float16,fp8,0,2.6953226725260415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,64,64,128,1,float16,float16,0,1.51146666208903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,64,64,128,1,float16,fp8,0,1.4277173678080242
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,64,1,128,1,float16,float16,0,1.3137439886728923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,64,1,128,1,float16,fp8,0,1.1174720128377278
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,64,2,128,1,float16,fp8,0,1.3710932731628418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,64,2,128,1,float16,float16,0,1.3187893231709797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,64,4,128,1,float16,float16,0,1.3601759274800618
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,64,4,128,1,float16,fp8,0,1.258687973022461
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,64,8,128,1,float16,float16,0,1.3681492805480957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,64,8,128,1,float16,fp8,0,1.1817066669464111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,64,1,128,1,float16,fp8,0,11.452239990234375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,64,2,128,1,float16,fp8,0,12.766544342041016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,64,1,128,1,float16,float16,0,22.027435302734375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,64,2,128,1,float16,float16,0,20.260079701741535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,64,4,128,1,float16,fp8,0,13.301199595133463
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,64,4,128,1,float16,float16,0,21.053226470947266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,64,8,128,1,float16,fp8,0,11.8560422261556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,64,8,128,1,float16,float16,0,22.508575439453125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,64,1,128,1,float16,float16,0,9.87832514444987
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,64,64,128,1,float16,float16,0,11.795514424641928
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,64,64,128,1,float16,fp8,0,7.371226628621419
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,64,1,128,1,float16,fp8,0,5.8732961018880205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,64,2,128,1,float16,fp8,0,5.945018768310547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,64,2,128,1,float16,float16,0,9.526277542114258
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,64,4,128,1,float16,float16,0,9.411306381225586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,64,4,128,1,float16,fp8,0,6.189648310343425
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,64,8,128,1,float16,fp8,0,5.895562489827474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,64,64,128,1,float16,fp8,0,3.7157440185546875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,64,8,128,1,float16,float16,0,10.930554707845053
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,64,64,128,1,float16,float16,0,3.8933280309041343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,64,1,128,1,float16,float16,0,3.2235838572184243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,64,1,128,1,float16,fp8,0,2.747221310933431
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,64,2,128,1,float16,fp8,0,2.886074701944987
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,64,2,128,1,float16,float16,0,3.4599040349324546
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,64,4,128,1,float16,fp8,0,2.915093421936035
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,64,4,128,1,float16,float16,0,4.491754531860352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,64,8,128,1,float16,float16,0,3.3337440490722656
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,64,8,128,1,float16,fp8,0,3.149669329325358
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,64,1,128,1,float16,float16,0,1.667301336924235
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,64,1,128,1,float16,fp8,0,1.4683252970377605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,64,64,128,1,float16,float16,0,2.006378650665283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,64,64,128,1,float16,fp8,0,1.9124266306559246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,64,2,128,1,float16,float16,0,1.9036320050557454
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,64,2,128,1,float16,fp8,0,1.4552853902180989
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,64,4,128,1,float16,float16,0,1.8750240008036296
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,64,4,128,1,float16,fp8,0,1.5508480072021484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,64,8,128,1,float16,fp8,0,1.5558133125305176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,64,8,128,1,float16,float16,0,2.450266679128011
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,64,1,128,1,float16,float16,0,0.9055626392364502
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,64,64,128,1,float16,fp8,0,0.9968533515930176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,64,1,128,1,float16,fp8,0,1.1029280026753743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,64,64,128,1,float16,float16,0,1.2648639678955078
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,64,2,128,1,float16,float16,0,0.9020906289418539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,64,2,128,1,float16,fp8,0,0.7694453398386637
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,64,4,128,1,float16,fp8,0,0.8443520069122314
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,64,4,128,1,float16,float16,0,1.0088266531626384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,64,8,128,1,float16,float16,0,0.9396426677703857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,64,8,128,1,float16,fp8,0,0.8193919658660889
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,64,1,128,1,float16,fp8,0,6.51577631632487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,64,2,128,1,float16,fp8,0,6.5064748128255205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,64,1,128,1,float16,float16,0,10.270858764648438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,64,2,128,1,float16,float16,0,13.267472585042318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,64,4,128,1,float16,fp8,0,6.867717107137044
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,64,4,128,1,float16,float16,0,11.4161008199056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,64,8,128,1,float16,float16,0,11.524229685465494
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,64,8,128,1,float16,fp8,0,7.515647888183594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,64,64,128,1,float16,float16,0,4.751466751098633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,64,1,128,1,float16,fp8,0,3.193141301472982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,64,64,128,1,float16,fp8,0,4.652581214904785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,64,1,128,1,float16,float16,0,5.6712799072265625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,64,2,128,1,float16,float16,0,3.8620640436808267
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,64,2,128,1,float16,fp8,0,3.4395519892374673
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,64,4,128,1,float16,float16,0,3.9780639012654624
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,64,4,128,1,float16,fp8,0,4.181407928466797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,64,8,128,1,float16,fp8,0,3.4854561487833657
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,64,64,128,1,float16,float16,0,2.4161599477132163
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,64,8,128,1,float16,float16,0,5.029130617777507
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,64,1,128,1,float16,float16,0,1.9574507077534993
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,64,64,128,1,float16,fp8,0,2.357850710550944
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,64,1,128,1,float16,fp8,0,1.669472058614095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,64,2,128,1,float16,float16,0,1.9691252708435059
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,64,2,128,1,float16,fp8,0,2.0352320671081543
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,64,4,128,1,float16,float16,0,2.0190879503885903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,64,4,128,1,float16,fp8,0,1.8064266840616863
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,64,8,128,1,float16,fp8,0,1.7862879435221355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,64,8,128,1,float16,float16,0,2.005530675252279
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,64,1,128,1,float16,float16,0,1.0153546333312988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,64,1,128,1,float16,fp8,0,1.0289226373036702
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,64,64,128,1,float16,float16,0,1.2573333581288655
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,64,64,128,1,float16,fp8,0,1.2142399946848552
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,64,2,128,1,float16,float16,0,1.0310400327046711
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,64,2,128,1,float16,fp8,0,0.8726452986399332
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,64,4,128,1,float16,float16,0,1.0733813444773357
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,64,4,128,1,float16,fp8,0,0.9492479960123698
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,64,8,128,1,float16,float16,0,1.066975990931193
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,64,8,128,1,float16,fp8,0,0.9321386814117432
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,64,1,128,1,float16,float16,0,0.546127994855245
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,64,1,128,1,float16,fp8,0,0.46777065594991046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,64,64,128,1,float16,fp8,0,0.6432000001271566
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,64,64,128,1,float16,float16,0,0.6932373046875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,64,2,128,1,float16,float16,0,0.5488106807072958
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,64,2,128,1,float16,fp8,0,0.47252265612284344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,64,4,128,1,float16,fp8,0,0.507365345954895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,64,4,128,1,float16,float16,0,0.5811786651611328
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,64,8,128,1,float16,fp8,0,0.5007359981536865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,64,8,128,1,float16,float16,0,0.5752586523691813
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,64,1,128,1,float16,float16,0,7.0912322998046875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,64,1,128,1,float16,fp8,0,6.861957550048828
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,64,2,128,1,float16,float16,0,8.633989334106445
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,64,2,128,1,float16,fp8,0,6.203482945760091
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,64,4,128,1,float16,fp8,0,7.041482925415039
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,64,4,128,1,float16,float16,0,11.176539103190104
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,64,8,128,1,float16,float16,0,11.203807830810547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,64,8,128,1,float16,fp8,0,6.914805094401042
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,64,64,128,1,float16,float16,0,5.627541224161784
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,64,1,128,1,float16,fp8,0,3.042954762776693
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,64,1,128,1,float16,float16,0,4.568458557128906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,64,64,128,1,float16,fp8,0,4.971578598022461
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,64,2,128,1,float16,fp8,0,3.126592000325521
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,64,2,128,1,float16,float16,0,3.6336374282836914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,64,4,128,1,float16,fp8,0,3.359978675842285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,64,4,128,1,float16,float16,0,3.869978586832682
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,64,8,128,1,float16,float16,0,3.968101183573405
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,64,8,128,1,float16,fp8,0,3.5621280670166016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,64,64,128,1,float16,float16,0,2.5023999214172363
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,64,64,128,1,float16,fp8,0,2.524197260538737
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,64,1,128,1,float16,float16,0,1.9351040522257488
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,64,1,128,1,float16,fp8,0,1.5461066563924153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,64,2,128,1,float16,fp8,0,1.5865707397460938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,64,2,128,1,float16,float16,0,2.159877300262451
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,64,4,128,1,float16,float16,0,2.259893258412679
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,64,4,128,1,float16,fp8,0,1.7277226448059082
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,64,8,128,1,float16,float16,0,1.9683787027994792
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,64,8,128,1,float16,fp8,0,1.9016532897949219
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,64,64,128,1,float16,float16,0,1.3046613534291585
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,64,1,128,1,float16,float16,0,0.9270933469136556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,64,1,128,1,float16,fp8,0,0.7987199624379476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,64,64,128,1,float16,fp8,0,1.2898720105489094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,64,2,128,1,float16,fp8,0,0.8301440080006918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,64,2,128,1,float16,float16,0,0.9597760041554769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,64,4,128,1,float16,fp8,0,0.9295360247294108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,64,4,128,1,float16,float16,0,1.0270079771677654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,64,8,128,1,float16,float16,0,1.0073760350545247
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,64,8,128,1,float16,fp8,0,0.9285759925842285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,64,1,128,1,float16,float16,0,0.4971466859181722
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,64,1,128,1,float16,fp8,0,0.43405334154764813
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,64,64,128,1,float16,fp8,0,0.6716852982838949
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,64,64,128,1,float16,float16,0,0.7173066933949789
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,64,2,128,1,float16,float16,0,0.5062133471171061
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,64,2,128,1,float16,fp8,0,0.44231998920440674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,64,4,128,1,float16,float16,0,0.5433866580327352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,64,8,128,1,float16,fp8,0,0.4924000104268392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,64,4,128,1,float16,fp8,0,0.4915200074513753
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,64,8,128,1,float16,float16,0,0.5439733266830444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,64,1,128,1,float16,float16,0,0.27686933676401776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,64,1,128,1,float16,fp8,0,0.2358293334643046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,64,2,128,1,float16,fp8,0,0.24240533510843912
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,64,2,128,1,float16,float16,0,0.2820693254470825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,64,4,128,1,float16,fp8,0,0.2680426637331645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,64,4,128,1,float16,float16,0,0.2977386713027954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,64,64,128,1,float16,float16,0,0.3776533206303914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,64,64,128,1,float16,fp8,0,0.36474132537841797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,64,8,128,1,float16,fp8,0,0.2675306598345439
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,64,8,128,1,float16,float16,0,0.2974666754404704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,64,1,128,1,float16,fp8,0,3.6260639826456704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,64,1,128,1,float16,float16,0,4.088090578715007
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,64,2,128,1,float16,float16,0,4.197936058044434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,64,2,128,1,float16,fp8,0,3.7107038497924805
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,64,4,128,1,float16,float16,0,4.716640154520671
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,64,4,128,1,float16,fp8,0,4.122223854064941
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,64,8,128,1,float16,fp8,0,4.129311879475911
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,64,8,128,1,float16,float16,0,5.112133344014485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,64,1,128,1,float16,float16,0,2.1509973208109536
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,64,1,128,1,float16,fp8,0,1.8514399528503418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,64,64,128,1,float16,fp8,0,3.2885119120279946
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,64,64,128,1,float16,float16,0,3.1784534454345703
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,64,2,128,1,float16,fp8,0,1.8917919794718425
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,64,2,128,1,float16,float16,0,2.3005760510762534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,64,4,128,1,float16,fp8,0,2.12827730178833
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,64,4,128,1,float16,float16,0,2.3536853790283203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,64,8,128,1,float16,float16,0,2.3561546007792153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,64,8,128,1,float16,fp8,0,2.1258400281270347
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,64,1,128,1,float16,fp8,0,0.9483733177185059
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,64,1,128,1,float16,float16,0,1.1913653214772542
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,64,2,128,1,float16,fp8,0,0.9743146896362305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,64,2,128,1,float16,float16,0,1.1372426350911458
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,64,64,128,1,float16,float16,0,1.6269599596659343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,64,64,128,1,float16,fp8,0,1.6679733594258626
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,64,4,128,1,float16,fp8,0,1.0902613004048665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,64,4,128,1,float16,float16,0,1.2297120094299316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,64,8,128,1,float16,fp8,0,1.0789013703664143
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,64,8,128,1,float16,float16,0,1.2053600152333577
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,64,1,128,1,float16,float16,0,0.572218656539917
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,64,1,128,1,float16,fp8,0,0.4982293446858724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,64,64,128,1,float16,fp8,0,0.8591093222300211
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,64,64,128,1,float16,float16,0,0.8778293132781982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,64,2,128,1,float16,fp8,0,0.5135306517283121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,64,2,128,1,float16,float16,0,0.5854080120722452
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,64,4,128,1,float16,float16,0,0.6492480039596558
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,64,4,128,1,float16,fp8,0,0.5841120084126791
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,64,8,128,1,float16,float16,0,0.6478826602300009
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,64,8,128,1,float16,fp8,0,0.5737226804097494
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,64,1,128,1,float16,float16,0,0.3105493386586507
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,64,1,128,1,float16,fp8,0,0.2715839942296346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,64,64,128,1,float16,float16,0,0.45316799481709796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,64,64,128,1,float16,fp8,0,0.45134401321411133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,64,2,128,1,float16,float16,0,0.31665066878000897
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,64,4,128,1,float16,float16,0,0.345850666364034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,64,2,128,1,float16,fp8,0,0.28549333413441974
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,64,4,128,1,float16,fp8,0,0.31119465827941895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,64,8,128,1,float16,float16,0,0.3506346543629964
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,64,8,128,1,float16,fp8,0,0.3193333347638448
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,64,1,128,1,float16,float16,0,0.17182934284210205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,64,1,128,1,float16,fp8,0,0.15095466375350952
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,64,64,128,1,float16,float16,0,0.26343466838200885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,64,2,128,1,float16,float16,0,0.17573332786560059
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,64,64,128,1,float16,fp8,0,0.25360000133514404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,64,2,128,1,float16,fp8,0,0.15495999654134116
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,64,4,128,1,float16,fp8,0,0.16540799538294473
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,64,4,128,1,float16,float16,0,0.19065600633621216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,64,8,128,1,float16,fp8,0,0.16540799538294473
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,64,8,128,1,float16,float16,0,0.19107200702031454
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,64,1,128,1,float16,float16,0,4.044831911722819
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,64,1,128,1,float16,fp8,0,3.624901453653971
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,64,2,128,1,float16,float16,0,4.733877182006836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,64,2,128,1,float16,fp8,0,3.7530666987101235
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,64,4,128,1,float16,float16,0,4.843546549479167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,64,4,128,1,float16,fp8,0,4.36195723215739
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,64,8,128,1,float16,fp8,0,4.403818766276042
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,64,8,128,1,float16,float16,0,5.238218625386556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,64,1,128,1,float16,float16,0,2.0572853088378906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,64,1,128,1,float16,fp8,0,1.8059679667154949
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,64,64,128,1,float16,float16,0,3.5155839920043945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,64,64,128,1,float16,fp8,0,3.754458745320638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,64,2,128,1,float16,fp8,0,1.89302396774292
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,64,2,128,1,float16,float16,0,2.7242558797200522
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,64,4,128,1,float16,fp8,0,2.1236106554667153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,64,4,128,1,float16,float16,0,2.4572906494140625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,64,8,128,1,float16,float16,0,2.3795520464579263
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,64,8,128,1,float16,fp8,0,2.247626622517904
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,64,1,128,1,float16,float16,0,1.1717759768168132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,64,1,128,1,float16,fp8,0,0.9304533004760742
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,64,64,128,1,float16,float16,0,1.8205599784851074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,64,64,128,1,float16,fp8,0,1.9043466250101726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,64,2,128,1,float16,float16,0,1.0978026390075684
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,64,2,128,1,float16,fp8,0,0.9699040253957113
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,64,4,128,1,float16,fp8,0,1.1790133317311604
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,64,8,128,1,float16,fp8,0,1.1210347016652424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,64,4,128,1,float16,float16,0,1.290618658065796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,64,8,128,1,float16,float16,0,1.2308159669240315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,64,1,128,1,float16,fp8,0,0.4842453400293986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,64,1,128,1,float16,float16,0,0.569050669670105
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,64,2,128,1,float16,float16,0,0.5790666739145914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,64,2,128,1,float16,fp8,0,0.50819198290507
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,64,64,128,1,float16,fp8,0,0.972389300664266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,64,64,128,1,float16,float16,0,0.9500959714253744
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,64,4,128,1,float16,float16,0,0.6643786827723185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,64,4,128,1,float16,fp8,0,0.5880266825358073
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,64,8,128,1,float16,fp8,0,0.5877493222554525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,64,8,128,1,float16,float16,0,0.6409440040588379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,64,1,128,1,float16,fp8,0,0.26548266410827637
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,64,1,128,1,float16,float16,0,0.3058026631673177
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,64,64,128,1,float16,fp8,0,0.5057386557261149
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,64,64,128,1,float16,float16,0,0.5298293431599935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,64,2,128,1,float16,float16,0,0.3076853354771932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,64,2,128,1,float16,fp8,0,0.27290666103363037
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,64,4,128,1,float16,float16,0,0.3503733476003011
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,64,4,128,1,float16,fp8,0,0.3258933424949646
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,64,8,128,1,float16,fp8,0,0.32814399401346844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,64,8,128,1,float16,float16,0,0.3554826577504476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,64,1,128,1,float16,float16,0,0.1658399999141693
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,64,1,128,1,float16,fp8,0,0.14316266775131226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,64,2,128,1,float16,float16,0,0.17218667268753052
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,64,2,128,1,float16,fp8,0,0.14864533146222433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,64,4,128,1,float16,fp8,0,0.1669279932975769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,64,4,128,1,float16,float16,0,0.19169066349665323
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,64,64,128,1,float16,fp8,0,0.27000532547632855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,64,64,128,1,float16,float16,0,0.2852906584739685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,64,8,128,1,float16,fp8,0,0.17577066024144491
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,64,8,128,1,float16,float16,0,0.1902880072593689
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,64,1,128,1,float16,float16,0,0.09192533294359843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,64,1,128,1,float16,fp8,0,0.08288533488909404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,64,2,128,1,float16,float16,0,0.09476266304651897
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,64,64,128,1,float16,fp8,0,0.14831466476122537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,64,2,128,1,float16,fp8,0,0.08493333061536153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,64,64,128,1,float16,float16,0,0.15663466850916544
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,64,4,128,1,float16,float16,0,0.10034666458765666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,64,4,128,1,float16,fp8,0,0.09345600008964539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,64,8,128,1,float16,float16,0,0.10094400246938069
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,64,8,128,1,float16,fp8,0,0.09318400422732036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,64,1,128,1,float16,fp8,0,2.222592035929362
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,64,1,128,1,float16,float16,0,2.5962346394856772
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,64,2,128,1,float16,float16,0,2.6226933797200522
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,64,2,128,1,float16,fp8,0,2.3532692591349282
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,64,4,128,1,float16,fp8,0,2.7485386530558267
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,64,4,128,1,float16,float16,0,3.109381357828776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,64,8,128,1,float16,fp8,0,2.7899627685546875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,64,8,128,1,float16,float16,0,3.013728141784668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,64,1,128,1,float16,float16,0,1.3235092957814534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,64,64,128,1,float16,float16,0,2.388304074605306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,64,1,128,1,float16,fp8,0,1.2154346307118733
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,64,64,128,1,float16,fp8,0,2.597424030303955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,64,2,128,1,float16,fp8,0,1.198298692703247
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,64,2,128,1,float16,float16,0,1.3592212994893391
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,64,4,128,1,float16,float16,0,1.5685493151346843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,64,4,128,1,float16,fp8,0,1.3897333145141602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,64,8,128,1,float16,fp8,0,1.4254186948140461
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,64,8,128,1,float16,float16,0,1.559775988260905
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,64,1,128,1,float16,float16,0,0.6773866812388102
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,64,1,128,1,float16,fp8,0,0.5896480083465576
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,64,64,128,1,float16,float16,0,1.2566239833831787
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,64,64,128,1,float16,fp8,0,1.316159963607788
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,64,2,128,1,float16,float16,0,0.7145013014475504
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,64,2,128,1,float16,fp8,0,0.6276746590932211
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,64,4,128,1,float16,float16,0,0.8338507016499838
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,64,4,128,1,float16,fp8,0,0.7389333248138428
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,64,8,128,1,float16,float16,0,0.8214933077494303
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,64,8,128,1,float16,fp8,0,0.7512693405151367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,64,1,128,1,float16,float16,0,0.3549013137817383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,64,1,128,1,float16,fp8,0,0.3120586673418681
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,64,2,128,1,float16,float16,0,0.3781546751658122
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,64,64,128,1,float16,float16,0,0.6776533126831055
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,64,2,128,1,float16,fp8,0,0.3302453358968099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,64,4,128,1,float16,float16,0,0.42828798294067383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,64,64,128,1,float16,fp8,0,0.6787786483764648
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,64,4,128,1,float16,fp8,0,0.40463467439015705
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,64,8,128,1,float16,float16,0,0.43779198328653973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,64,8,128,1,float16,fp8,0,0.39261865615844727
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,64,1,128,1,float16,fp8,0,0.16866666078567505
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,64,1,128,1,float16,float16,0,0.19358932971954346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,64,2,128,1,float16,float16,0,0.20040533939997354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,64,2,128,1,float16,fp8,0,0.18145600954691568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,64,64,128,1,float16,float16,0,0.36926400661468506
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,64,4,128,1,float16,float16,0,0.23059733708699545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,64,4,128,1,float16,fp8,0,0.21549334128697714
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,64,64,128,1,float16,fp8,0,0.3582079807917277
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,64,8,128,1,float16,fp8,0,0.2097439964612325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,64,8,128,1,float16,float16,0,0.23213332891464233
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,64,1,128,1,float16,float16,0,0.10480533043543498
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,64,1,128,1,float16,fp8,0,0.09385066231091817
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,64,2,128,1,float16,float16,0,0.10892800490061443
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,64,2,128,1,float16,fp8,0,0.09742400050163269
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,64,64,128,1,float16,fp8,0,0.19130667050679526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,64,4,128,1,float16,float16,0,0.12200533350308736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,64,64,128,1,float16,float16,0,0.20058133204778036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,64,4,128,1,float16,fp8,0,0.10940800110499065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,64,8,128,1,float16,fp8,0,0.11084799965222676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,64,8,128,1,float16,float16,0,0.12850667039553323
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,64,1,128,1,float16,float16,0,0.06276800235112508
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,64,1,128,1,float16,fp8,0,0.058448001742362976
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,64,2,128,1,float16,float16,0,0.06402666866779327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,64,2,128,1,float16,fp8,0,0.06039999922116598
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,64,4,128,1,float16,float16,0,0.06763199965159099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,64,64,128,1,float16,float16,0,0.11412266890207927
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,64,4,128,1,float16,fp8,0,0.06550399959087372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,64,64,128,1,float16,fp8,0,0.1130400002002716
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,64,8,128,1,float16,float16,0,0.06853333115577698
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,64,8,128,1,float16,fp8,0,0.06644799808661143
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,64,1,128,1,float16,fp8,0,2.3658666610717773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,64,1,128,1,float16,float16,0,2.736538569132487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,64,2,128,1,float16,float16,0,2.796677271525065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,64,2,128,1,float16,fp8,0,2.5372586250305176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,64,4,128,1,float16,float16,0,3.3870665232340493
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,64,4,128,1,float16,fp8,0,2.9736480712890625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,64,8,128,1,float16,float16,0,3.3769706090291343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,64,8,128,1,float16,fp8,0,3.241898536682129
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,64,1,128,1,float16,float16,0,1.4073492685953777
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,64,1,128,1,float16,fp8,0,1.2082346280415852
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,64,2,128,1,float16,float16,0,1.4411999384562175
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,64,2,128,1,float16,fp8,0,1.2846240202585857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,64,64,128,1,float16,float16,0,2.8055092493693032
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,64,64,128,1,float16,fp8,0,3.1392265955607095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,64,4,128,1,float16,float16,0,1.7192266782124836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,64,4,128,1,float16,fp8,0,1.5836639404296875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,64,8,128,1,float16,float16,0,1.683834711710612
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,64,8,128,1,float16,fp8,0,1.6624479293823242
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,64,1,128,1,float16,float16,0,0.7090506553649902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,64,1,128,1,float16,fp8,0,0.6196320056915283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,64,64,128,1,float16,fp8,0,1.592741330464681
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,64,64,128,1,float16,float16,0,1.4326400756835938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,64,2,128,1,float16,float16,0,0.7458826700846354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,64,2,128,1,float16,fp8,0,0.6619306802749634
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,64,4,128,1,float16,float16,0,0.8978772958119711
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,64,4,128,1,float16,fp8,0,0.8146719932556152
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,64,8,128,1,float16,float16,0,0.9083840052286783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,64,8,128,1,float16,fp8,0,0.8129599889119467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,64,1,128,1,float16,float16,0,0.3716213305791219
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,64,1,128,1,float16,fp8,0,0.32310400406519574
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,64,2,128,1,float16,float16,0,0.39268799622853595
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,64,2,128,1,float16,fp8,0,0.3502720197041829
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,64,4,128,1,float16,float16,0,0.47035733858744305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,64,64,128,1,float16,float16,0,0.7356053193410238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,64,64,128,1,float16,fp8,0,0.8117919762929281
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,64,4,128,1,float16,fp8,0,0.4512373208999634
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,64,8,128,1,float16,fp8,0,0.44546667734781903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,64,8,128,1,float16,float16,0,0.4605120023091634
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,64,1,128,1,float16,float16,0,0.20150399208068848
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,64,64,128,1,float16,fp8,0,0.42051732540130615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,64,64,128,1,float16,float16,0,0.429690678914388
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,64,1,128,1,float16,fp8,0,0.17682133118311563
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,64,2,128,1,float16,float16,0,0.212826669216156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,64,4,128,1,float16,float16,0,0.2521599928538005
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,64,2,128,1,float16,fp8,0,0.18874667088190714
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,64,4,128,1,float16,fp8,0,0.23777600129445395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,64,8,128,1,float16,float16,0,0.2458720008532206
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,64,8,128,1,float16,fp8,0,0.23898667097091675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,64,1,128,1,float16,float16,0,0.10853866736094157
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,64,1,128,1,float16,fp8,0,0.09422399600346883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,64,2,128,1,float16,float16,0,0.11570133765538533
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,64,64,128,1,float16,fp8,0,0.2293813427289327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,64,64,128,1,float16,float16,0,0.20381333430608115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,64,2,128,1,float16,fp8,0,0.09896533687909444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,64,4,128,1,float16,float16,0,0.13708266615867615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,64,4,128,1,float16,fp8,0,0.12274133165677388
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,64,8,128,1,float16,float16,0,0.13247999548912048
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,64,8,128,1,float16,fp8,0,0.1251200040181478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,64,1,128,1,float16,float16,0,0.06163733204205831
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,64,1,128,1,float16,fp8,0,0.056847999493281044
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,64,2,128,1,float16,float16,0,0.06367999811967213
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,64,2,128,1,float16,fp8,0,0.05829333265622457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,64,4,128,1,float16,fp8,0,0.06633066634337108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,64,64,128,1,float16,fp8,0,0.12101333340009053
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,64,4,128,1,float16,float16,0,0.06897066533565521
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,64,64,128,1,float16,float16,0,0.11353066563606262
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,64,8,128,1,float16,float16,0,0.0690773328145345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,64,8,128,1,float16,fp8,0,0.06606400012969971
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,64,1,128,1,float16,float16,0,0.04012266546487808
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,64,1,128,1,float16,fp8,0,0.03613866617282232
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,64,2,128,1,float16,float16,0,0.040922666589419045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,64,2,128,1,float16,fp8,0,0.03755199909210205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,64,64,128,1,float16,fp8,0,0.06221333146095276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,64,4,128,1,float16,fp8,0,0.04178133110205332
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,64,4,128,1,float16,float16,0,0.04362666606903076
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,64,64,128,1,float16,float16,0,0.06657066444555919
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,64,8,128,1,float16,fp8,0,0.04159466673930486
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,64,8,128,1,float16,float16,0,0.04378133515516917
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,64,1,128,1,float16,float16,0,2.0673707326253257
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,64,1,128,1,float16,fp8,0,1.7639946937561035
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,64,2,128,1,float16,float16,0,2.1931145985921225
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,64,2,128,1,float16,fp8,0,1.9099040031433105
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,64,4,128,1,float16,float16,0,2.8724533716837564
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,64,4,128,1,float16,fp8,0,2.701290766398112
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,64,8,128,1,float16,float16,0,2.677695910135905
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,64,8,128,1,float16,fp8,0,2.483311971028646
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,64,1,128,1,float16,float16,0,1.0483787059783936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,64,1,128,1,float16,fp8,0,0.8974613348642985
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,64,64,128,1,float16,float16,0,2.4798080126444497
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,64,64,128,1,float16,fp8,0,2.8304052352905273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,64,2,128,1,float16,float16,0,1.1213653087615967
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,64,2,128,1,float16,fp8,0,0.9694773356119791
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,64,4,128,1,float16,float16,0,1.4420852661132812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,64,4,128,1,float16,fp8,0,1.345381259918213
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,64,8,128,1,float16,float16,0,1.3710133234659831
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,64,8,128,1,float16,fp8,0,1.3661972681681316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,64,1,128,1,float16,float16,0,0.5420533418655396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,64,64,128,1,float16,float16,0,1.2584586938222249
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,64,64,128,1,float16,fp8,0,1.4326133728027344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,64,1,128,1,float16,fp8,0,0.4604959885279338
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,64,2,128,1,float16,float16,0,0.5793066819508871
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,64,2,128,1,float16,fp8,0,0.5167413155237833
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,64,4,128,1,float16,fp8,0,0.6431839863459269
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,64,4,128,1,float16,float16,0,0.7320906321207682
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,64,8,128,1,float16,float16,0,0.710202693939209
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,64,8,128,1,float16,fp8,0,0.6898880004882812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,64,1,128,1,float16,float16,0,0.2836959958076477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,64,64,128,1,float16,fp8,0,0.7301546732584635
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,64,64,128,1,float16,float16,0,0.6493173440297445
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,64,1,128,1,float16,fp8,0,0.24369599421819052
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,64,2,128,1,float16,float16,0,0.30215466022491455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,64,2,128,1,float16,fp8,0,0.2669973373413086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,64,4,128,1,float16,float16,0,0.3953760067621867
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,64,4,128,1,float16,fp8,0,0.370037317276001
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,64,8,128,1,float16,float16,0,0.3713013331095378
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,64,8,128,1,float16,fp8,0,0.3469119866689046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,64,1,128,1,float16,float16,0,0.1527253290017446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,64,1,128,1,float16,fp8,0,0.13225600123405457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,64,2,128,1,float16,float16,0,0.16194666425387064
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,64,2,128,1,float16,fp8,0,0.14260266224543253
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,64,64,128,1,float16,float16,0,0.33511467774709064
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,64,64,128,1,float16,fp8,0,0.37699198722839355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,64,4,128,1,float16,float16,0,0.20433066288630167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,64,4,128,1,float16,fp8,0,0.18709333737691244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,64,8,128,1,float16,float16,0,0.20854399601618448
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,64,8,128,1,float16,fp8,0,0.19548267126083374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,64,1,128,1,float16,float16,0,0.08627200126647949
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,64,1,128,1,float16,fp8,0,0.07262399792671204
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,64,2,128,1,float16,fp8,0,0.07765333354473114
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,64,2,128,1,float16,float16,0,0.09146133065223694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,64,64,128,1,float16,fp8,0,0.19779733816782633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,64,64,128,1,float16,float16,0,0.17595734198888144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,64,4,128,1,float16,fp8,0,0.09416000048319499
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,64,4,128,1,float16,float16,0,0.10913599530855815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,64,8,128,1,float16,float16,0,0.10763200124104817
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,64,8,128,1,float16,fp8,0,0.10014399886131287
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,64,1,128,1,float16,float16,0,0.0480373352766037
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,64,2,128,1,float16,float16,0,0.050111999114354454
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,64,1,128,1,float16,fp8,0,0.04186133543650309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,64,2,128,1,float16,fp8,0,0.044453332821528115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,64,64,128,1,float16,float16,0,0.0956160028775533
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,64,64,128,1,float16,fp8,0,0.108106662829717
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,64,4,128,1,float16,float16,0,0.0537066658337911
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,64,8,128,1,float16,float16,0,0.05557866891225179
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,64,4,128,1,float16,fp8,0,0.05182399849096934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,64,8,128,1,float16,fp8,0,0.05223466455936432
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,64,1,128,1,float16,float16,0,0.029461334149042766
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,64,1,128,1,float16,fp8,0,0.02739733209212621
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,64,2,128,1,float16,float16,0,0.03137599925200144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,64,64,128,1,float16,fp8,0,0.05381333331267039
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,64,2,128,1,float16,fp8,0,0.029674666623274486
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,64,64,128,1,float16,float16,0,0.05212266743183136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,64,4,128,1,float16,float16,0,0.03348266581694285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,64,4,128,1,float16,fp8,0,0.03350399931271871
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,64,8,128,1,float16,fp8,0,0.033573334415753685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,64,8,128,1,float16,float16,0,0.033946665624777474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,64,1,128,1,float16,float16,0,0.019786667078733444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,64,1,128,1,float16,fp8,0,0.019359999646743137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,64,2,128,1,float16,float16,0,0.01929066702723503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,64,64,128,1,float16,float16,0,0.02555199960867564
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,64,2,128,1,float16,fp8,0,0.019194666296243668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,64,64,128,1,float16,fp8,0,0.033301333586374916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,64,4,128,1,float16,fp8,0,0.02147199958562851
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,64,4,128,1,float16,float16,0,0.021546666820844013
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,64,8,128,1,float16,fp8,0,0.02141333371400833
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,64,8,128,1,float16,float16,0,0.022005334496498108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,64,1,128,1,float16,float16,0,0.8103733062744141
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,64,1,128,1,float16,fp8,0,0.7196959654490153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,64,2,128,1,float16,float16,0,0.876746654510498
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,64,2,128,1,float16,fp8,0,0.7957759698232015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,64,4,128,1,float16,fp8,0,1.172714630762736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,64,4,128,1,float16,float16,0,1.2077866395314534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,64,8,128,1,float16,float16,0,1.2213066418965657
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,64,8,128,1,float16,fp8,0,1.0889546871185303
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,64,1,128,1,float16,float16,0,0.4187413454055786
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,64,64,128,1,float16,fp8,0,1.3397119839986165
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,64,64,128,1,float16,float16,0,1.1405920187632244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,64,1,128,1,float16,fp8,0,0.36902399857838947
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,64,2,128,1,float16,float16,0,0.46482133865356445
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,64,2,128,1,float16,fp8,0,0.42822933197021484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,64,4,128,1,float16,float16,0,0.6127200126647949
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,64,4,128,1,float16,fp8,0,0.560421347618103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,64,8,128,1,float16,float16,0,0.5955679814020792
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,64,8,128,1,float16,fp8,0,0.6158506472905477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,64,1,128,1,float16,float16,0,0.21983466545740762
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,64,64,128,1,float16,float16,0,0.5808853308359782
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,64,64,128,1,float16,fp8,0,0.6805706818898519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,64,1,128,1,float16,fp8,0,0.19451733430226645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,64,2,128,1,float16,float16,0,0.23664534091949463
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,64,2,128,1,float16,fp8,0,0.22184000412623087
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,64,4,128,1,float16,float16,0,0.3150239984194438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,64,4,128,1,float16,fp8,0,0.3198240001996358
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,64,8,128,1,float16,float16,0,0.3036106626192729
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,64,8,128,1,float16,fp8,0,0.29180266459782916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,64,1,128,1,float16,fp8,0,0.10823466380437215
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,64,1,128,1,float16,float16,0,0.1183626651763916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,64,64,128,1,float16,fp8,0,0.3511679967244466
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,64,64,128,1,float16,float16,0,0.3009013334910075
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,64,2,128,1,float16,float16,0,0.12628799676895142
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,64,2,128,1,float16,fp8,0,0.1169653336207072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,64,4,128,1,float16,float16,0,0.17910399039586386
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,64,4,128,1,float16,fp8,0,0.16547200083732605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,64,8,128,1,float16,float16,0,0.17219199736913046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,64,8,128,1,float16,fp8,0,0.16980266571044922
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,64,1,128,1,float16,float16,0,0.06506133576234181
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,64,1,128,1,float16,fp8,0,0.055946667989095054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,64,2,128,1,float16,float16,0,0.07077333331108093
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,64,2,128,1,float16,fp8,0,0.06111466884613037
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,64,4,128,1,float16,float16,0,0.0881866713364919
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,64,64,128,1,float16,float16,0,0.15901333093643188
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,64,64,128,1,float16,fp8,0,0.18180267016092935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,64,4,128,1,float16,fp8,0,0.07982400059700012
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,64,8,128,1,float16,float16,0,0.08717866738637288
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,64,8,128,1,float16,fp8,0,0.09041066964467366
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,64,1,128,1,float16,float16,0,0.03775999943415324
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,64,2,128,1,float16,float16,0,0.0395413339138031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,64,1,128,1,float16,fp8,0,0.03573866685231527
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,64,2,128,1,float16,fp8,0,0.03763733307520548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,64,64,128,1,float16,float16,0,0.10076266527175903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,64,64,128,1,float16,fp8,0,0.10075199604034424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,64,4,128,1,float16,float16,0,0.04566933214664459
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,64,4,128,1,float16,fp8,0,0.04567466676235199
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,64,8,128,1,float16,float16,0,0.04595733185609182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,64,8,128,1,float16,fp8,0,0.0458186666170756
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,64,1,128,1,float16,float16,0,0.023738667368888855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,64,1,128,1,float16,fp8,0,0.021498667697111767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,64,2,128,1,float16,float16,0,0.025120000044504803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,64,2,128,1,float16,fp8,0,0.023530667026837666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,64,64,128,1,float16,fp8,0,0.04771199822425842
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,64,64,128,1,float16,float16,0,0.0517493337392807
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,64,4,128,1,float16,float16,0,0.027434666951497395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,64,8,128,1,float16,float16,0,0.02743999908367793
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,64,4,128,1,float16,fp8,0,0.027242665489514668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,64,8,128,1,float16,fp8,0,0.029311999678611755
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,64,1,128,1,float16,float16,0,0.017114666601022083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,64,1,128,1,float16,fp8,0,0.016842667013406754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,64,2,128,1,float16,float16,0,0.017029333859682083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,64,2,128,1,float16,fp8,0,0.016917333006858826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,64,4,128,1,float16,float16,0,0.019018666197856266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,64,64,128,1,float16,fp8,0,0.029930666089057922
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,64,4,128,1,float16,fp8,0,0.01912533367673556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,64,64,128,1,float16,float16,0,0.023386667172114056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,64,8,128,1,float16,float16,0,0.01897066707412402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,64,8,128,1,float16,fp8,0,0.019280000279347103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,64,1,128,1,float16,float16,0,0.015317333241303762
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,64,1,128,1,float16,fp8,0,0.015386667102575302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,64,64,128,1,float16,fp8,0,0.021482666333516438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,64,2,128,1,float16,float16,0,0.015226667126019796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,64,64,128,1,float16,float16,0,0.019258666783571243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,64,2,128,1,float16,fp8,0,0.015141333142916361
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,64,4,128,1,float16,float16,0,0.01515199989080429
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,64,8,128,1,float16,float16,0,0.016341333587964375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,64,4,128,1,float16,fp8,0,0.016938666502634685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,64,8,128,1,float16,fp8,0,0.016997333616018295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,64,1,128,1,float16,float16,0,0.49644800027211505
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,64,1,128,1,float16,fp8,0,0.4457333485285441
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,64,2,128,1,float16,float16,0,0.5331466595331827
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,64,2,128,1,float16,fp8,0,0.48983999093373615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,64,4,128,1,float16,float16,0,0.6832160154978434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,64,4,128,1,float16,fp8,0,0.6213653484980265
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,64,8,128,1,float16,float16,0,0.687712033589681
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,64,1,128,1,float16,float16,0,0.2614186604817708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,64,8,128,1,float16,fp8,0,0.6854293346405029
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,64,64,128,1,float16,float16,0,0.6185653209686279
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,64,64,128,1,float16,fp8,0,0.7193706830342611
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,64,1,128,1,float16,fp8,0,0.23041599988937378
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,64,2,128,1,float16,float16,0,0.2779200077056885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,64,2,128,1,float16,fp8,0,0.2576533357302348
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,64,4,128,1,float16,float16,0,0.360640009244283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,64,4,128,1,float16,fp8,0,0.3414986530939738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,64,8,128,1,float16,float16,0,0.3457706769307454
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,64,8,128,1,float16,fp8,0,0.33480532964070636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,64,1,128,1,float16,float16,0,0.13942933082580566
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,64,64,128,1,float16,float16,0,0.3174346685409546
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,64,64,128,1,float16,fp8,0,0.36739198366800946
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,64,1,128,1,float16,fp8,0,0.12519466876983643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,64,2,128,1,float16,float16,0,0.14702933033307394
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,64,4,128,1,float16,float16,0,0.19076265891393027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,64,2,128,1,float16,fp8,0,0.1390666663646698
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,64,4,128,1,float16,fp8,0,0.17799999316533408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,64,8,128,1,float16,float16,0,0.192138671875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,64,8,128,1,float16,fp8,0,0.18050134181976318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,64,1,128,1,float16,float16,0,0.07542933523654938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,64,2,128,1,float16,float16,0,0.08205866813659668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,64,64,128,1,float16,fp8,0,0.192522664864858
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,64,1,128,1,float16,fp8,0,0.06628799935181935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,64,64,128,1,float16,float16,0,0.16768000523249307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,64,2,128,1,float16,fp8,0,0.0724373310804367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,64,4,128,1,float16,float16,0,0.09894933303197224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,64,4,128,1,float16,fp8,0,0.09381866455078125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,64,8,128,1,float16,float16,0,0.09882666667302449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,64,8,128,1,float16,fp8,0,0.09530133008956909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,64,1,128,1,float16,float16,0,0.04188266893227895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,64,1,128,1,float16,fp8,0,0.037647999823093414
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,64,2,128,1,float16,float16,0,0.04399466514587402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,64,2,128,1,float16,fp8,0,0.03976000100374222
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,64,4,128,1,float16,float16,0,0.049679999550183616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,64,64,128,1,float16,fp8,0,0.10309333602587382
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,64,64,128,1,float16,float16,0,0.08931733171145122
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,64,4,128,1,float16,fp8,0,0.04779199759165446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,64,8,128,1,float16,float16,0,0.051813334226608276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,64,8,128,1,float16,fp8,0,0.047877331574757896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,64,1,128,1,float16,float16,0,0.027450665831565857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,64,1,128,1,float16,fp8,0,0.024671999116738636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,64,2,128,1,float16,float16,0,0.029258665939172108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,64,64,128,1,float16,fp8,0,0.049733335773150124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,64,2,128,1,float16,fp8,0,0.02571200082699458
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,64,64,128,1,float16,float16,0,0.05453333258628845
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,64,4,128,1,float16,float16,0,0.03133333226044973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,64,4,128,1,float16,fp8,0,0.02962133288383484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,64,8,128,1,float16,float16,0,0.029487999776999157
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,64,8,128,1,float16,fp8,0,0.03014933317899704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,64,1,128,1,float16,float16,0,0.01735466718673706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,64,1,128,1,float16,fp8,0,0.017370666066805523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,64,64,128,1,float16,fp8,0,0.0313226655125618
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,64,2,128,1,float16,float16,0,0.017743999759356182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,64,2,128,1,float16,fp8,0,0.017157333592573803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,64,64,128,1,float16,float16,0,0.025205334027608235
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,64,4,128,1,float16,float16,0,0.019130667050679524
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,64,4,128,1,float16,fp8,0,0.019402666638294857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,64,8,128,1,float16,float16,0,0.019359999646743137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,64,8,128,1,float16,fp8,0,0.019706666469573975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,64,1,128,1,float16,fp8,0,0.01333333303531011
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,64,1,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,64,2,128,1,float16,float16,0,0.01313599944114685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,64,2,128,1,float16,fp8,0,0.015008000036080679
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,64,4,128,1,float16,float16,0,0.013258667041858038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,64,64,128,1,float16,float16,0,0.01664000004529953
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,64,4,128,1,float16,fp8,0,0.014981333166360855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,64,64,128,1,float16,fp8,0,0.01950399950146675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,64,8,128,1,float16,float16,0,0.013125333935022354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,64,8,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,64,1,128,1,float16,float16,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,64,1,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,64,2,128,1,float16,float16,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,64,2,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,64,4,128,1,float16,float16,0,0.012853333105643591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,64,4,128,1,float16,fp8,0,0.01314666618903478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,64,64,128,1,float16,fp8,0,0.0170666662355264
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,64,64,128,1,float16,float16,0,0.01522133375207583
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,64,8,128,1,float16,float16,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,64,8,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,64,1,128,1,float16,fp8,0,0.3747200171152751
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,64,1,128,1,float16,float16,0,0.43592532475789386
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,64,2,128,1,float16,float16,0,0.45681599775950116
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,64,4,128,1,float16,fp8,0,0.4723200003306071
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,64,4,128,1,float16,float16,0,0.5312106609344482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,64,8,128,1,float16,float16,0,0.5407040119171143
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,64,2,128,1,float16,fp8,0,0.39503467082977295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,64,1,128,1,float16,float16,0,0.2266826629638672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,64,8,128,1,float16,fp8,0,0.4719093243281047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,64,64,128,1,float16,float16,0,0.44545598824818927
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,64,64,128,1,float16,fp8,0,0.43483201662699383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,64,1,128,1,float16,fp8,0,0.193557341893514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,64,2,128,1,float16,float16,0,0.23600532611211142
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,64,4,128,1,float16,float16,0,0.274181326230367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,64,2,128,1,float16,fp8,0,0.20440000295639038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,64,4,128,1,float16,fp8,0,0.2513013283411662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,64,8,128,1,float16,float16,0,0.2729439934094747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,64,8,128,1,float16,fp8,0,0.256602664788564
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,64,1,128,1,float16,float16,0,0.1204746663570404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,64,64,128,1,float16,float16,0,0.23161067565282187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,64,64,128,1,float16,fp8,0,0.23178666830062866
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,64,1,128,1,float16,fp8,0,0.10169066985448201
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,64,2,128,1,float16,float16,0,0.12612799803415933
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,64,4,128,1,float16,fp8,0,0.12769066294034323
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,64,2,128,1,float16,fp8,0,0.10753066341082256
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,64,4,128,1,float16,float16,0,0.14407466848691305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,64,8,128,1,float16,float16,0,0.14273599783579508
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,64,8,128,1,float16,fp8,0,0.12915199995040894
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,64,1,128,1,float16,float16,0,0.06408533453941345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,64,1,128,1,float16,fp8,0,0.055642664432525635
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,64,2,128,1,float16,float16,0,0.06485866506894429
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,64,64,128,1,float16,fp8,0,0.11958400408426921
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,64,64,128,1,float16,float16,0,0.12111999591191609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,64,2,128,1,float16,fp8,0,0.056703999638557434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,64,4,128,1,float16,float16,0,0.0705973356962204
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,64,4,128,1,float16,fp8,0,0.06427733103434245
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,64,8,128,1,float16,float16,0,0.07253866891066234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,64,8,128,1,float16,fp8,0,0.06629866858323415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,64,1,128,1,float16,float16,0,0.037674665451049805
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,64,1,128,1,float16,fp8,0,0.03201599915822347
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,64,2,128,1,float16,float16,0,0.03858133405447006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,64,2,128,1,float16,fp8,0,0.03347733368476232
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,64,64,128,1,float16,float16,0,0.0642133355140686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,64,4,128,1,float16,float16,0,0.04166933397452036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,64,64,128,1,float16,fp8,0,0.0602400004863739
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,64,4,128,1,float16,fp8,0,0.03756800045569738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,64,8,128,1,float16,float16,0,0.04171733558177948
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,64,8,128,1,float16,fp8,0,0.03867200016975403
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,64,1,128,1,float16,fp8,0,0.023215999205907185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,64,1,128,1,float16,float16,0,0.024154665569464367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,64,2,128,1,float16,float16,0,0.024165332317352295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,64,64,128,1,float16,fp8,0,0.0358240008354187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,64,2,128,1,float16,fp8,0,0.023370665808518726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,64,4,128,1,float16,float16,0,0.025472000241279602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,64,64,128,1,float16,float16,0,0.029968000948429108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,64,4,128,1,float16,fp8,0,0.02325333406527837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,64,8,128,1,float16,float16,0,0.025610665480295818
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,64,8,128,1,float16,fp8,0,0.025562666356563568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,64,1,128,1,float16,fp8,0,0.016586666305859882
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,64,1,128,1,float16,float16,0,0.01700266698996226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,64,2,128,1,float16,float16,0,0.01695466662446658
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,64,2,128,1,float16,fp8,0,0.01706133286158244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,64,4,128,1,float16,float16,0,0.01725333308180173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,64,4,128,1,float16,fp8,0,0.016800000021855038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,64,64,128,1,float16,fp8,0,0.02276266614596049
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,64,8,128,1,float16,float16,0,0.018063999712467194
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,64,64,128,1,float16,float16,0,0.01933866615096728
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,64,8,128,1,float16,fp8,0,0.0169813334941864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,64,1,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,64,1,128,1,float16,fp8,0,0.013290667285521826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,64,2,128,1,float16,float16,0,0.013088000317414602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,64,2,128,1,float16,fp8,0,0.013232000172138214
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,64,4,128,1,float16,float16,0,0.012997332960367203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,64,64,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,64,4,128,1,float16,fp8,0,0.013253333667914072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,64,8,128,1,float16,float16,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,64,64,128,1,float16,float16,0,0.015008000036080679
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,64,8,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,64,1,128,1,float16,fp8,0,0.012784000486135483
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,64,1,128,1,float16,float16,0,0.012805332740147909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,64,2,128,1,float16,float16,0,0.012837332983811697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,64,2,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,64,4,128,1,float16,float16,0,0.013151999562978745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,64,64,128,1,float16,fp8,0,0.017194667210181553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,64,64,128,1,float16,float16,0,0.015311999867359797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,64,4,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,64,8,128,1,float16,float16,0,0.012800000607967377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,64,8,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,64,1,128,1,float16,float16,0,0.41260266304016113
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,64,1,128,1,float16,fp8,0,0.33508801460266113
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,64,2,128,1,float16,fp8,0,0.34432534376780194
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,64,2,128,1,float16,float16,0,0.42016534010569256
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,64,4,128,1,float16,fp8,0,0.3877120018005371
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,64,4,128,1,float16,float16,0,0.45842134952545166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,64,8,128,1,float16,float16,0,0.4572480122248332
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,64,8,128,1,float16,fp8,0,0.4015413522720337
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,64,1,128,1,float16,float16,0,0.21242666244506836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,64,64,128,1,float16,fp8,0,0.2932906746864319
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,64,64,128,1,float16,float16,0,0.3230773409207662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,64,1,128,1,float16,fp8,0,0.17133333285649618
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,64,4,128,1,float16,float16,0,0.237664004166921
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,64,2,128,1,float16,float16,0,0.22183465957641602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,64,2,128,1,float16,fp8,0,0.18159466981887817
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,64,4,128,1,float16,fp8,0,0.20061333974202475
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,64,8,128,1,float16,float16,0,0.23752532402674356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,64,8,128,1,float16,fp8,0,0.20121600230534872
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,64,1,128,1,float16,fp8,0,0.09030933181444804
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,64,1,128,1,float16,float16,0,0.1113813320795695
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,64,64,128,1,float16,float16,0,0.16676799456278482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,64,2,128,1,float16,float16,0,0.11409067114194234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,64,64,128,1,float16,fp8,0,0.1604426701863607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,64,2,128,1,float16,fp8,0,0.09328533212343852
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,64,4,128,1,float16,fp8,0,0.09994666775067647
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,64,8,128,1,float16,float16,0,0.12140799562136333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,64,4,128,1,float16,float16,0,0.11776000261306763
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,64,1,128,1,float16,float16,0,0.06053866446018219
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,64,8,128,1,float16,fp8,0,0.10171733299891154
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,64,1,128,1,float16,fp8,0,0.049839998284975685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,64,64,128,1,float16,fp8,0,0.07858666777610779
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,64,2,128,1,float16,float16,0,0.062314664324124656
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,64,64,128,1,float16,float16,0,0.09177066882451375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,64,2,128,1,float16,fp8,0,0.05193600058555603
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,64,4,128,1,float16,float16,0,0.06429333488146464
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,64,4,128,1,float16,fp8,0,0.05615466833114624
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,64,8,128,1,float16,float16,0,0.06417066852251689
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,64,8,128,1,float16,fp8,0,0.0558240016301473
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,64,1,128,1,float16,fp8,0,0.031082667410373688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,64,1,128,1,float16,float16,0,0.03554133325815201
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,64,2,128,1,float16,float16,0,0.035487999518712364
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,64,2,128,1,float16,fp8,0,0.031231999397277832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,64,64,128,1,float16,float16,0,0.042490666111310325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,64,4,128,1,float16,float16,0,0.036650667587916054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,64,64,128,1,float16,fp8,0,0.04376000165939331
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,64,4,128,1,float16,fp8,0,0.03325333446264267
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,64,8,128,1,float16,float16,0,0.03709866603215536
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,64,8,128,1,float16,fp8,0,0.03314133236805598
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,64,1,128,1,float16,float16,0,0.02306666721900304
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,64,2,128,1,float16,float16,0,0.023237332701683044
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,64,1,128,1,float16,fp8,0,0.02107200026512146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,64,2,128,1,float16,fp8,0,0.021114667256673176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,64,64,128,1,float16,fp8,0,0.027642667293548584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,64,64,128,1,float16,float16,0,0.025610665480295818
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,64,4,128,1,float16,float16,0,0.023269332945346832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,64,4,128,1,float16,fp8,0,0.02117866774400075
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,64,8,128,1,float16,float16,0,0.023333333432674408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,64,8,128,1,float16,fp8,0,0.021130666136741638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,64,1,128,1,float16,float16,0,0.017045332739750545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,64,1,128,1,float16,fp8,0,0.01629866659641266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,64,2,128,1,float16,float16,0,0.017055999487638474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,64,2,128,1,float16,fp8,0,0.016234666109085083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,64,64,128,1,float16,fp8,0,0.019050666441520054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,64,4,128,1,float16,fp8,0,0.017077332983414333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,64,64,128,1,float16,float16,0,0.018719999740521114
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,64,4,128,1,float16,float16,0,0.017045332739750545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,64,8,128,1,float16,fp8,0,0.016858667135238647
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,64,8,128,1,float16,float16,0,0.017045332739750545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,64,1,128,1,float16,float16,0,0.012863999853531519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,64,1,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,64,2,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,64,2,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,64,4,128,1,float16,float16,0,0.012944000462690989
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,64,64,128,1,float16,float16,0,0.015050667027632395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,64,64,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,64,4,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,64,8,128,1,float16,float16,0,0.013141332815090815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,64,8,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,64,1,128,1,float16,float16,0,0.012842666357755661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,64,2,128,1,float16,float16,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,64,1,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,64,2,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,64,4,128,1,float16,float16,0,0.013034666577974955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,64,4,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,64,64,128,1,float16,fp8,0,0.017103999853134155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,64,64,128,1,float16,float16,0,0.015210667004187902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,64,8,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,64,8,128,1,float16,float16,0,0.012794667234023413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,64,1,128,1,float16,float16,0,0.4010560115178426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,64,2,128,1,float16,float16,0,0.40697598457336426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,64,2,128,1,float16,fp8,0,0.3221919933954875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,64,1,128,1,float16,fp8,0,0.3186239997545878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,64,4,128,1,float16,float16,0,0.4217333396275838
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,64,4,128,1,float16,fp8,0,0.3432533343633016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,64,8,128,1,float16,float16,0,0.4198613166809082
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,64,8,128,1,float16,fp8,0,0.34147198994954425
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,64,1,128,1,float16,fp8,0,0.1611840029557546
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,64,1,128,1,float16,float16,0,0.20730666319529215
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,64,64,128,1,float16,fp8,0,0.22636266549428305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,64,64,128,1,float16,float16,0,0.25863466660181683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,64,2,128,1,float16,float16,0,0.2086826761563619
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,64,2,128,1,float16,fp8,0,0.1625546713670095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,64,4,128,1,float16,float16,0,0.22110400597254434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,64,4,128,1,float16,fp8,0,0.17096000909805298
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,64,8,128,1,float16,fp8,0,0.1725333333015442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,64,8,128,1,float16,float16,0,0.21818133195241293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,64,1,128,1,float16,float16,0,0.10781332850456238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,64,1,128,1,float16,fp8,0,0.08701333403587341
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,64,64,128,1,float16,float16,0,0.141077329715093
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,64,64,128,1,float16,fp8,0,0.12426132957140605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,64,2,128,1,float16,float16,0,0.1099626620610555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,64,2,128,1,float16,fp8,0,0.08931199709574382
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,64,4,128,1,float16,fp8,0,0.09119466940561931
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,64,4,128,1,float16,float16,0,0.10969066619873047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,64,8,128,1,float16,float16,0,0.11219200491905212
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,64,8,128,1,float16,fp8,0,0.09102400143941243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,64,1,128,1,float16,float16,0,0.05946133534113566
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,64,64,128,1,float16,float16,0,0.0724373310804367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,64,64,128,1,float16,fp8,0,0.06244266529877981
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,64,1,128,1,float16,fp8,0,0.04786666731039683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,64,2,128,1,float16,fp8,0,0.04984533290068308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,64,4,128,1,float16,float16,0,0.060720001657803856
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,64,2,128,1,float16,float16,0,0.058090666929880776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,64,4,128,1,float16,fp8,0,0.05192000170548757
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,64,8,128,1,float16,float16,0,0.06028800209363302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,64,8,128,1,float16,fp8,0,0.050581331054369606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,64,1,128,1,float16,float16,0,0.0353973334034284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,64,1,128,1,float16,fp8,0,0.031504000226656594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,64,2,128,1,float16,float16,0,0.035562666753927864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,64,64,128,1,float16,float16,0,0.03777066618204117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,64,4,128,1,float16,float16,0,0.035573333501815796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,64,2,128,1,float16,fp8,0,0.031530665854612984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,64,64,128,1,float16,fp8,0,0.03701333453257879
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,64,4,128,1,float16,fp8,0,0.03126933425664902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,64,8,128,1,float16,float16,0,0.035605333745479584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,64,8,128,1,float16,fp8,0,0.03146666785081228
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,64,1,128,1,float16,float16,0,0.023034666975339253
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,64,2,128,1,float16,float16,0,0.023018665611743927
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,64,1,128,1,float16,fp8,0,0.021744000415007275
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,64,2,128,1,float16,fp8,0,0.021269333859284718
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,64,64,128,1,float16,fp8,0,0.025290665527184803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,64,4,128,1,float16,float16,0,0.023402666052182514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,64,64,128,1,float16,float16,0,0.025221332907676697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,64,4,128,1,float16,fp8,0,0.021162666380405426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,64,8,128,1,float16,float16,0,0.023130667706330616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,64,8,128,1,float16,fp8,0,0.02146666745344798
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,64,1,128,1,float16,float16,0,0.01603200038274129
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,64,1,128,1,float16,fp8,0,0.015365333606799444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,64,2,128,1,float16,float16,0,0.01695999999841054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,64,2,128,1,float16,fp8,0,0.015263999501864115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,64,4,128,1,float16,float16,0,0.016997333616018295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,64,64,128,1,float16,float16,0,0.01775466650724411
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,64,64,128,1,float16,fp8,0,0.01923199991385142
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,64,4,128,1,float16,fp8,0,0.015365333606799444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,64,8,128,1,float16,float16,0,0.017136000096797943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,64,8,128,1,float16,fp8,0,0.014975999792416891
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,64,1,128,1,float16,float16,0,0.013274667163689932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,64,2,128,1,float16,float16,0,0.012837332983811697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,64,1,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,64,2,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,64,4,128,1,float16,float16,0,0.012960000584522883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,64,64,128,1,float16,fp8,0,0.017242666333913803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,64,4,128,1,float16,fp8,0,0.01321600005030632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,64,64,128,1,float16,float16,0,0.015072000523408255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,64,8,128,1,float16,float16,0,0.013045333325862885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,64,8,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,64,1,128,1,float16,float16,0,0.012362666428089142
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,64,1,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,64,2,128,1,float16,float16,0,0.012597333639860153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,64,2,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,64,64,128,1,float16,fp8,0,0.016906666258970898
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,64,64,128,1,float16,float16,0,0.014949332922697067
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,64,4,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,64,4,128,1,float16,fp8,0,0.013141332815090815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,64,8,128,1,float16,float16,0,0.012794667234023413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,64,8,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,64,4,128,1,float16,float16,0,0.02164799968401591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,0,0.01937066639463107
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,64,8,128,1,float16,float16,0,0.027679999669392902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,0,0.025621332228183746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,64,64,128,1,float16,float16,0,0.0995306670665741
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,64,64,128,1,float16,fp8,0,0.07849066456158955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,0,0.016549333930015564
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,0,0.015077333897352219
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,0,0.01904533306757609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,0,0.01766933376590411
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,64,64,128,1,float16,float16,0,0.05830933153629303
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,64,64,128,1,float16,fp8,0,0.04389866689840952
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,0,0.013050666699806849
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,0,0.015034666905800501
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,0,0.014975999792416891
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,64,64,128,1,float16,fp8,0,0.029391999046007793
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,64,64,128,1,float16,float16,0,0.029418667157491047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,0,0.011173332730929056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,0,0.012810666114091873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,0,0.010890666395425797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,64,64,128,1,float16,float16,0,0.021210665504137676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,64,64,128,1,float16,fp8,0,0.021168000996112823
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,0,0.010768000036478043
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,0,0.010794666906197866
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,0,0.010703999549150467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,64,64,128,1,float16,fp8,0,0.01724799970785777
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,64,64,128,1,float16,float16,0,0.015285332997639975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,0,0.010869332899649939
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,0,0.011231999844312668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,0,0.010725333044926325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,0,0.010751999914646149
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,64,64,128,1,float16,fp8,0,0.01523200049996376
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,64,64,128,1,float16,float16,0,0.013034666577974955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,0,0.010816000401973724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,0,0.010709332923094431
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,0,0.010629333555698395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,0,0.010911999891201654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,64,64,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,64,64,128,1,float16,fp8,0,0.015194666882356008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,0,0.009450666606426239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,0,0.010661333799362183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,64,64,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,0,0.011146667102972666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,64,64,128,1,float16,float16,0,0.012773333738247553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,0,0.009813333551088968
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,0,0.008890666688481966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,48,2,128,1,float16,fp8,0,16.92898686726888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,48,1,128,1,float16,fp8,0,17.866357167561848
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,48,2,128,1,float16,float16,0,27.906661987304688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,48,1,128,1,float16,float16,0,28.2849858601888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,48,4,128,1,float16,fp8,0,18.52837371826172
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,48,4,128,1,float16,float16,0,29.654683430989582
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,48,8,128,1,float16,fp8,0,16.835535685221355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,48,48,128,1,float16,float16,0,18.23311996459961
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,48,8,128,1,float16,float16,0,30.827540079752605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,48,48,128,1,float16,fp8,0,10.24886957804362
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,48,1,128,1,float16,float16,0,15.695109049479166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,48,1,128,1,float16,fp8,0,9.466853459676107
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,48,2,128,1,float16,fp8,0,8.548885345458984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,48,2,128,1,float16,float16,0,15.30734380086263
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,48,4,128,1,float16,float16,0,15.613749186197916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,48,4,128,1,float16,fp8,0,10.018320083618164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,48,8,128,1,float16,fp8,0,8.735237121582031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,48,48,128,1,float16,fp8,0,4.735552151997884
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,48,8,128,1,float16,float16,0,15.483749389648438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,48,48,128,1,float16,float16,0,5.186469395955403
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,48,1,128,1,float16,float16,0,5.035824139912923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,48,1,128,1,float16,fp8,0,4.030943870544434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,48,2,128,1,float16,fp8,0,3.984522819519043
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,48,2,128,1,float16,float16,0,7.294458389282227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,48,4,128,1,float16,fp8,0,4.238954544067383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,48,4,128,1,float16,float16,0,7.442464192708333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,48,8,128,1,float16,float16,0,5.8375199635823565
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,48,8,128,1,float16,fp8,0,4.164186795552571
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,48,48,128,1,float16,float16,0,2.7075840632120767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,48,48,128,1,float16,fp8,0,2.422447999318441
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,48,1,128,1,float16,float16,0,3.418922742207845
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,48,1,128,1,float16,fp8,0,2.0469279289245605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,48,2,128,1,float16,float16,0,2.4646132787068686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,48,2,128,1,float16,fp8,0,2.070261319478353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,48,4,128,1,float16,fp8,0,2.125706672668457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,48,4,128,1,float16,float16,0,2.594005266825358
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,48,8,128,1,float16,float16,0,2.64684263865153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,48,8,128,1,float16,fp8,0,2.1586880683898926
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,48,1,128,1,float16,fp8,0,9.32475725809733
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,48,2,128,1,float16,fp8,0,9.336373647054037
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,48,1,128,1,float16,float16,0,17.08136494954427
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,48,2,128,1,float16,float16,0,18.760037740071613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,48,4,128,1,float16,fp8,0,9.690746943155924
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,48,4,128,1,float16,float16,0,17.036404927571613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,48,48,128,1,float16,float16,0,8.01362164815267
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,48,8,128,1,float16,fp8,0,11.359408060709635
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,48,8,128,1,float16,float16,0,18.25540288289388
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,48,1,128,1,float16,fp8,0,4.517631848653157
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,48,48,128,1,float16,fp8,0,5.660762786865234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,48,1,128,1,float16,float16,0,9.6102294921875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,48,2,128,1,float16,float16,0,6.684965133666992
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,48,2,128,1,float16,fp8,0,4.568122545878093
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,48,4,128,1,float16,float16,0,6.5743147532145185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,48,4,128,1,float16,fp8,0,5.398752212524414
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,48,8,128,1,float16,fp8,0,4.864826520284017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,48,8,128,1,float16,float16,0,6.968533198038737
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,48,48,128,1,float16,fp8,0,2.861311912536621
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,48,48,128,1,float16,float16,0,3.1117172241210938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,48,1,128,1,float16,float16,0,2.745567957560221
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,48,1,128,1,float16,fp8,0,2.3690080642700195
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,48,2,128,1,float16,float16,0,2.8290506998697915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,48,2,128,1,float16,fp8,0,2.354981263478597
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,48,4,128,1,float16,float16,0,2.8264052073160806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,48,4,128,1,float16,fp8,0,2.478661378224691
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,48,8,128,1,float16,fp8,0,2.472815990447998
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,48,8,128,1,float16,float16,0,3.228010813395182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,48,48,128,1,float16,float16,0,1.6295092900594075
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,48,48,128,1,float16,fp8,0,1.5046720504760742
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,48,1,128,1,float16,float16,0,1.4129120508829753
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,48,1,128,1,float16,fp8,0,1.287823994954427
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,48,2,128,1,float16,float16,0,1.4278772672017415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,48,2,128,1,float16,fp8,0,1.2363733450571697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,48,4,128,1,float16,float16,0,1.4925227165222168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,48,4,128,1,float16,fp8,0,1.297925313313802
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,48,8,128,1,float16,float16,0,1.488485336303711
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,48,8,128,1,float16,fp8,0,1.2873706817626953
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,48,1,128,1,float16,fp8,0,7.467397054036458
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,48,2,128,1,float16,fp8,0,6.498122533162435
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,48,1,128,1,float16,float16,0,10.621301651000977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,48,2,128,1,float16,float16,0,9.615253448486328
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,48,4,128,1,float16,fp8,0,6.8904158274332685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,48,8,128,1,float16,fp8,0,7.016901016235352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,48,4,128,1,float16,float16,0,13.279733022054037
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,48,8,128,1,float16,float16,0,11.420548756917318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,48,48,128,1,float16,float16,0,4.4285173416137695
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,48,48,128,1,float16,fp8,0,4.148736000061035
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,48,1,128,1,float16,float16,0,4.375311851501465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,48,1,128,1,float16,fp8,0,3.2398398717244468
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,48,2,128,1,float16,float16,0,3.822688102722168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,48,2,128,1,float16,fp8,0,3.3431679407755532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,48,4,128,1,float16,fp8,0,3.4562241236368814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,48,4,128,1,float16,float16,0,6.136533101399739
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,48,8,128,1,float16,fp8,0,3.5684000651041665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,48,48,128,1,float16,float16,0,2.2581493059794107
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,48,8,128,1,float16,float16,0,5.11354128519694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,48,48,128,1,float16,fp8,0,2.120426654815674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,48,1,128,1,float16,float16,0,1.911146640777588
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,48,1,128,1,float16,fp8,0,1.6727360089619954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,48,2,128,1,float16,float16,0,2.2358880043029785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,48,2,128,1,float16,fp8,0,1.6802825927734375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,48,4,128,1,float16,fp8,0,1.8101600011189778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,48,4,128,1,float16,float16,0,2.818197250366211
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,48,8,128,1,float16,float16,0,2.023578643798828
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,48,8,128,1,float16,fp8,0,1.7658294041951497
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,48,1,128,1,float16,float16,0,1.0352160135904949
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,48,48,128,1,float16,float16,0,1.1881813208262126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,48,1,128,1,float16,fp8,0,0.8959733645121256
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,48,48,128,1,float16,fp8,0,1.6658080418904622
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,48,2,128,1,float16,float16,0,1.04259196917216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,48,2,128,1,float16,fp8,0,0.9036320050557455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,48,4,128,1,float16,float16,0,1.1101866563161213
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,48,4,128,1,float16,fp8,0,0.9397546450297037
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,48,8,128,1,float16,float16,0,1.0753280321757
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,48,8,128,1,float16,fp8,0,0.9517866770426432
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,48,1,128,1,float16,fp8,0,8.765962600708008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,48,2,128,1,float16,fp8,0,8.601930618286133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,48,2,128,1,float16,float16,0,15.624768575032553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,48,1,128,1,float16,float16,0,17.90881093343099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,48,4,128,1,float16,fp8,0,9.115599950154623
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,48,4,128,1,float16,float16,0,15.204432169596354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,48,8,128,1,float16,fp8,0,10.038496017456055
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,48,48,128,1,float16,float16,0,6.57746696472168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,48,8,128,1,float16,float16,0,16.498036702473957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,48,48,128,1,float16,fp8,0,5.692965189615886
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,48,1,128,1,float16,float16,0,7.1608530680338545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,48,1,128,1,float16,fp8,0,4.233248074849446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,48,2,128,1,float16,fp8,0,4.264634768168132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,48,2,128,1,float16,float16,0,7.456677118937175
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,48,4,128,1,float16,float16,0,6.281504313151042
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,48,4,128,1,float16,fp8,0,4.701312065124512
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,48,8,128,1,float16,float16,0,5.243669191996257
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,48,8,128,1,float16,fp8,0,4.673935890197754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,48,48,128,1,float16,float16,0,3.0847574869791665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,48,48,128,1,float16,fp8,0,2.905381202697754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,48,1,128,1,float16,float16,0,3.0690345764160156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,48,1,128,1,float16,fp8,0,2.1536693572998047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,48,2,128,1,float16,float16,0,2.5812479654947915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,48,2,128,1,float16,fp8,0,2.1987147331237793
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,48,4,128,1,float16,fp8,0,2.323263963063558
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,48,4,128,1,float16,float16,0,4.264512062072754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,48,8,128,1,float16,fp8,0,2.3688106536865234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,48,8,128,1,float16,float16,0,3.296346664428711
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,48,48,128,1,float16,float16,0,1.556063969930013
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,48,1,128,1,float16,float16,0,1.2910719712575276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,48,1,128,1,float16,fp8,0,1.3338346481323242
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,48,48,128,1,float16,fp8,0,1.4857920010884602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,48,2,128,1,float16,float16,0,1.3035840193430583
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,48,2,128,1,float16,fp8,0,1.1495947043100994
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,48,4,128,1,float16,float16,0,1.3796265920003254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,48,4,128,1,float16,fp8,0,1.351626714070638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,48,8,128,1,float16,float16,0,1.3927359580993652
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,48,8,128,1,float16,fp8,0,1.205509344736735
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,48,48,128,1,float16,float16,0,0.827999989191691
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,48,1,128,1,float16,float16,0,0.6978507041931152
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,48,48,128,1,float16,fp8,0,0.7913866837819418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,48,2,128,1,float16,float16,0,0.7088479995727539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,48,1,128,1,float16,fp8,0,0.6059413353602091
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,48,2,128,1,float16,fp8,0,0.6155360142389933
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,48,4,128,1,float16,fp8,0,0.6626559893290201
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,48,4,128,1,float16,float16,0,0.7434826691945394
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,48,8,128,1,float16,float16,0,0.7498453458150228
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,48,8,128,1,float16,fp8,0,0.6631040175755819
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,48,1,128,1,float16,float16,0,5.587866465250651
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,48,1,128,1,float16,fp8,0,4.944970766703288
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,48,2,128,1,float16,fp8,0,5.0562238693237305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,48,2,128,1,float16,float16,0,10.131738662719727
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,48,4,128,1,float16,fp8,0,5.54307746887207
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,48,8,128,1,float16,float16,0,8.106847763061523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,48,4,128,1,float16,float16,0,8.090511957804361
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,48,8,128,1,float16,fp8,0,5.573877334594727
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,48,1,128,1,float16,float16,0,3.2239678700764975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,48,48,128,1,float16,float16,0,3.6797119776407876
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,48,1,128,1,float16,fp8,0,2.477146625518799
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,48,48,128,1,float16,fp8,0,3.6228694915771484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,48,2,128,1,float16,fp8,0,2.5668373107910156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,48,2,128,1,float16,float16,0,3.92955748240153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,48,4,128,1,float16,float16,0,3.203983942667643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,48,4,128,1,float16,fp8,0,2.835904121398926
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,48,8,128,1,float16,float16,0,3.1050666173299155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,48,8,128,1,float16,fp8,0,2.770517349243164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,48,48,128,1,float16,float16,0,1.9741172790527344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,48,48,128,1,float16,fp8,0,1.8473706245422363
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,48,1,128,1,float16,fp8,0,1.2839466730753581
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,48,1,128,1,float16,float16,0,1.4938453038533528
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,48,2,128,1,float16,fp8,0,1.3160053094228108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,48,2,128,1,float16,float16,0,1.5509066581726074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,48,4,128,1,float16,fp8,0,1.537530740102132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,48,4,128,1,float16,float16,0,1.6094506581624348
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,48,8,128,1,float16,float16,0,1.5964639981587727
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,48,8,128,1,float16,fp8,0,1.7833493550618489
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,48,1,128,1,float16,fp8,0,0.6791733105977377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,48,1,128,1,float16,float16,0,0.7846666971842448
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,48,48,128,1,float16,fp8,0,0.9571306705474854
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,48,48,128,1,float16,float16,0,0.9863946437835693
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,48,2,128,1,float16,float16,0,0.8049653371175131
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,48,2,128,1,float16,fp8,0,0.7423040072123209
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,48,4,128,1,float16,float16,0,0.8473760286966959
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,48,4,128,1,float16,fp8,0,0.7698240280151367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,48,8,128,1,float16,fp8,0,0.8030986785888672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,48,8,128,1,float16,float16,0,0.8677386442820231
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,48,1,128,1,float16,float16,0,0.4313226540883382
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,48,1,128,1,float16,fp8,0,0.3758133252461751
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,48,48,128,1,float16,fp8,0,0.5157653490702311
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,48,2,128,1,float16,float16,0,0.43884265422821045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,48,2,128,1,float16,fp8,0,0.3859786589940389
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,48,48,128,1,float16,float16,0,0.5428800185521444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,48,4,128,1,float16,float16,0,0.46750934918721515
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,48,4,128,1,float16,fp8,0,0.4203733205795288
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,48,8,128,1,float16,fp8,0,0.4122613271077474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,48,8,128,1,float16,float16,0,0.4694240093231201
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,48,1,128,1,float16,fp8,0,4.740810712178548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,48,2,128,1,float16,fp8,0,4.965888023376465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,48,2,128,1,float16,float16,0,5.435312271118164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,48,1,128,1,float16,float16,0,7.150586446126302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,48,4,128,1,float16,fp8,0,5.409562428792317
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,48,4,128,1,float16,float16,0,6.09724235534668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,48,8,128,1,float16,float16,0,6.299930572509766
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,48,8,128,1,float16,fp8,0,5.6253706614176435
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,48,1,128,1,float16,fp8,0,2.4140052795410156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,48,1,128,1,float16,float16,0,2.695295969645182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,48,48,128,1,float16,float16,0,4.2428693771362305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,48,48,128,1,float16,fp8,0,3.905285199483236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,48,2,128,1,float16,float16,0,2.9587891896565757
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,48,2,128,1,float16,fp8,0,3.7242453893025718
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,48,4,128,1,float16,fp8,0,2.8601598739624023
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,48,4,128,1,float16,float16,0,3.1642611821492515
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,48,8,128,1,float16,float16,0,3.323333422342936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,48,8,128,1,float16,fp8,0,2.859269460042318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,48,1,128,1,float16,float16,0,1.398314634958903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,48,48,128,1,float16,float16,0,1.9444640477498372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,48,48,128,1,float16,fp8,0,1.9754773775736492
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,48,1,128,1,float16,fp8,0,1.2249066829681396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,48,2,128,1,float16,float16,0,1.425711949666341
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,48,2,128,1,float16,fp8,0,1.2746613025665283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,48,4,128,1,float16,fp8,0,1.3991947174072266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,48,4,128,1,float16,float16,0,1.5950560569763184
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,48,8,128,1,float16,float16,0,1.6840747197469075
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,48,8,128,1,float16,fp8,0,1.4215359687805176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,48,1,128,1,float16,float16,0,0.8948799769083658
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,48,48,128,1,float16,float16,0,1.0173973242441814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,48,48,128,1,float16,fp8,0,1.0186879634857178
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,48,1,128,1,float16,fp8,0,0.6428586641947428
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,48,2,128,1,float16,float16,0,0.7613600095113119
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,48,2,128,1,float16,fp8,0,0.6650506655375162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,48,4,128,1,float16,fp8,0,0.766266663869222
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,48,4,128,1,float16,float16,0,0.841749350229899
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,48,8,128,1,float16,float16,0,0.8390239874521891
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,48,8,128,1,float16,fp8,0,0.7370560169219971
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,48,1,128,1,float16,float16,0,0.40004265308380127
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,48,48,128,1,float16,float16,0,0.5543786684672037
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,48,48,128,1,float16,fp8,0,0.5366666714350382
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,48,1,128,1,float16,fp8,0,0.3511893351872762
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,48,2,128,1,float16,float16,0,0.40989867846171063
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,48,2,128,1,float16,fp8,0,0.36635732650756836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,48,4,128,1,float16,float16,0,0.44297067324320477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,48,4,128,1,float16,fp8,0,0.40484265486399335
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,48,8,128,1,float16,float16,0,0.44523199399312335
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,48,8,128,1,float16,fp8,0,0.4113759994506836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,48,1,128,1,float16,float16,0,0.22010133663813272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,48,1,128,1,float16,fp8,0,0.19606399536132812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,48,48,128,1,float16,float16,0,0.3151093324025472
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,48,48,128,1,float16,fp8,0,0.29450666904449463
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,48,2,128,1,float16,float16,0,0.22794665892918906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,48,4,128,1,float16,fp8,0,0.2188053329785665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,48,2,128,1,float16,fp8,0,0.20125865936279297
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,48,4,128,1,float16,float16,0,0.24672534068425497
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,48,8,128,1,float16,fp8,0,0.21896000703175864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,48,8,128,1,float16,float16,0,0.24709866444269815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,48,1,128,1,float16,float16,0,3.2023146947224936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,48,2,128,1,float16,fp8,0,3.016789436340332
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,48,2,128,1,float16,float16,0,3.3903306325276694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,48,1,128,1,float16,fp8,0,2.8799254099527993
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,48,4,128,1,float16,fp8,0,3.5201279322306314
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,48,4,128,1,float16,float16,0,3.8299999237060547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,48,8,128,1,float16,float16,0,3.7946399052937827
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,48,8,128,1,float16,fp8,0,3.436645189921061
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,48,1,128,1,float16,float16,0,1.7465227444966633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,48,1,128,1,float16,fp8,0,1.4683359464009602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,48,48,128,1,float16,float16,0,2.4826134045918784
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,48,48,128,1,float16,fp8,0,2.604618708292643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,48,2,128,1,float16,float16,0,1.7328906059265137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,48,2,128,1,float16,fp8,0,1.5275893211364746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,48,4,128,1,float16,float16,0,1.936581293741862
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,48,4,128,1,float16,fp8,0,1.7321972846984863
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,48,8,128,1,float16,float16,0,2.013253370920817
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,48,8,128,1,float16,fp8,0,1.7706507047017415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,48,1,128,1,float16,float16,0,0.9094879627227783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,48,1,128,1,float16,fp8,0,0.7592159907023112
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,48,48,128,1,float16,float16,0,1.277669350306193
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,48,48,128,1,float16,fp8,0,1.3262293338775635
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,48,2,128,1,float16,float16,0,0.8958613077799479
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,48,2,128,1,float16,fp8,0,0.7874933083852133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,48,4,128,1,float16,float16,0,0.9992533524831136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,48,4,128,1,float16,fp8,0,0.9217546780904134
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,48,8,128,1,float16,float16,0,0.9953493277231852
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,48,8,128,1,float16,fp8,0,0.902458667755127
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,48,1,128,1,float16,float16,0,0.4626186688741048
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,48,48,128,1,float16,float16,0,0.681658665339152
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,48,1,128,1,float16,fp8,0,0.4038826624552409
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,48,48,128,1,float16,fp8,0,0.6861973603566488
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,48,2,128,1,float16,float16,0,0.47383467356363934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,48,2,128,1,float16,fp8,0,0.42472533384958905
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,48,4,128,1,float16,float16,0,0.5365546544392904
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,48,4,128,1,float16,fp8,0,0.4824320077896118
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,48,8,128,1,float16,float16,0,0.5354079802831014
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,48,8,128,1,float16,fp8,0,0.48369065920511883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,48,1,128,1,float16,float16,0,0.25257599353790283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,48,1,128,1,float16,fp8,0,0.2225546638170878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,48,2,128,1,float16,float16,0,0.2606773376464844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,48,48,128,1,float16,fp8,0,0.36587198575337726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,48,48,128,1,float16,float16,0,0.38060800234476727
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,48,2,128,1,float16,fp8,0,0.23117866118748984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,48,4,128,1,float16,float16,0,0.28813334306081134
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,48,4,128,1,float16,fp8,0,0.2649173339207967
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,48,8,128,1,float16,fp8,0,0.26946133375167847
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,48,8,128,1,float16,float16,0,0.2902933359146118
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,48,1,128,1,float16,float16,0,0.13991999626159668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,48,1,128,1,float16,fp8,0,0.1280586620171865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,48,2,128,1,float16,float16,0,0.14334400494893393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,48,48,128,1,float16,float16,0,0.2153600056966146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,48,2,128,1,float16,fp8,0,0.13106667002042136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,48,48,128,1,float16,fp8,0,0.20337599515914917
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,48,4,128,1,float16,float16,0,0.1527786652247111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,48,4,128,1,float16,fp8,0,0.14772799611091614
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,48,8,128,1,float16,float16,0,0.155648003021876
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,48,8,128,1,float16,fp8,0,0.14454399545987448
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,48,1,128,1,float16,float16,0,3.3920586903889975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,48,1,128,1,float16,fp8,0,2.898442586263021
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,48,2,128,1,float16,fp8,0,3.0988693237304688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,48,2,128,1,float16,float16,0,3.314298629760742
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,48,4,128,1,float16,float16,0,4.023258527119954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,48,4,128,1,float16,fp8,0,3.8289868036905923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,48,8,128,1,float16,float16,0,3.9891093571980796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,48,8,128,1,float16,fp8,0,3.669621467590332
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,48,1,128,1,float16,float16,0,1.6976799964904785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,48,1,128,1,float16,fp8,0,1.4764480590820312
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,48,48,128,1,float16,float16,0,2.7578293482462564
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,48,48,128,1,float16,fp8,0,2.9783573150634766
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,48,2,128,1,float16,float16,0,1.7139147122701008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,48,2,128,1,float16,fp8,0,1.56113068262736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,48,4,128,1,float16,float16,0,2.0449387232462564
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,48,4,128,1,float16,fp8,0,1.815264066060384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,48,8,128,1,float16,float16,0,2.045135974884033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,48,8,128,1,float16,fp8,0,1.9453226725260417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,48,1,128,1,float16,float16,0,0.8541706403096517
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,48,48,128,1,float16,float16,0,1.4082612991333008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,48,48,128,1,float16,fp8,0,1.5094985961914062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,48,1,128,1,float16,fp8,0,0.762826681137085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,48,2,128,1,float16,float16,0,0.8901653289794922
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,48,4,128,1,float16,float16,0,1.0435360272725422
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,48,2,128,1,float16,fp8,0,0.7987253665924072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,48,4,128,1,float16,fp8,0,0.9402026335398356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,48,8,128,1,float16,float16,0,1.0162506898244221
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,48,8,128,1,float16,fp8,0,0.950656016667684
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,48,1,128,1,float16,float16,0,0.4584426482518514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,48,1,128,1,float16,fp8,0,0.4017973343531291
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,48,48,128,1,float16,float16,0,0.7301066716512045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,48,48,128,1,float16,fp8,0,0.7772053082784017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,48,2,128,1,float16,float16,0,0.47815465927124023
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,48,2,128,1,float16,fp8,0,0.42483198642730713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,48,4,128,1,float16,float16,0,0.5585866769154867
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,48,4,128,1,float16,fp8,0,0.5266506671905518
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,48,8,128,1,float16,float16,0,0.5389493306477865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,48,8,128,1,float16,fp8,0,0.4981280167897542
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,48,1,128,1,float16,float16,0,0.24637333552042642
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,48,1,128,1,float16,fp8,0,0.21945599714914957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,48,48,128,1,float16,float16,0,0.4085226853688558
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,48,48,128,1,float16,fp8,0,0.40747201442718506
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,48,2,128,1,float16,float16,0,0.2582133412361145
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,48,2,128,1,float16,fp8,0,0.2350239952405294
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,48,4,128,1,float16,fp8,0,0.2756693363189697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,48,4,128,1,float16,float16,0,0.2985066572825114
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,48,8,128,1,float16,fp8,0,0.2746986746788025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,48,8,128,1,float16,float16,0,0.2954453428586324
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,48,1,128,1,float16,float16,0,0.13361600041389465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,48,1,128,1,float16,fp8,0,0.12100266416867574
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,48,2,128,1,float16,float16,0,0.13740266362826029
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,48,2,128,1,float16,fp8,0,0.12415466705958049
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,48,4,128,1,float16,float16,0,0.15587733189264932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,48,48,128,1,float16,fp8,0,0.22101867198944092
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,48,48,128,1,float16,float16,0,0.23067200183868408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,48,4,128,1,float16,fp8,0,0.1423893372217814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,48,8,128,1,float16,fp8,0,0.14403200149536133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,48,8,128,1,float16,float16,0,0.1612160007158915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,48,1,128,1,float16,float16,0,0.08131733536720276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,48,1,128,1,float16,fp8,0,0.07292266686757405
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,48,2,128,1,float16,float16,0,0.08152000109354655
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,48,48,128,1,float16,fp8,0,0.12157866358757019
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,48,48,128,1,float16,float16,0,0.12946133812268576
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,48,2,128,1,float16,fp8,0,0.07441600163777669
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,48,4,128,1,float16,float16,0,0.08701333403587341
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,48,8,128,1,float16,float16,0,0.08804266651471455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,48,4,128,1,float16,fp8,0,0.08402666449546814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,48,8,128,1,float16,fp8,0,0.08430400490760803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,48,1,128,1,float16,float16,0,2.0262667338053384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,48,1,128,1,float16,fp8,0,1.8628533681233723
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,48,2,128,1,float16,float16,0,2.1590879758199057
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,48,2,128,1,float16,fp8,0,1.9706719716389973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,48,4,128,1,float16,float16,0,2.5874880154927573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,48,4,128,1,float16,fp8,0,2.3983519872029624
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,48,8,128,1,float16,float16,0,2.5623839696248374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,48,8,128,1,float16,fp8,0,2.421290715535482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,48,1,128,1,float16,float16,0,1.0663680235544841
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,48,1,128,1,float16,fp8,0,0.9474026362101237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,48,48,128,1,float16,float16,0,1.8872639338175456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,48,48,128,1,float16,fp8,0,2.0781973203023276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,48,2,128,1,float16,float16,0,1.115610678990682
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,48,2,128,1,float16,fp8,0,1.0078667004903157
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,48,4,128,1,float16,float16,0,1.3142773310343425
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,48,4,128,1,float16,fp8,0,1.2175947030385335
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,48,8,128,1,float16,fp8,0,1.2331360181172688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,48,8,128,1,float16,float16,0,1.3532586097717285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,48,1,128,1,float16,float16,0,0.547274669011434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,48,1,128,1,float16,fp8,0,0.4890400171279907
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,48,48,128,1,float16,fp8,0,1.0591839949289958
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,48,48,128,1,float16,float16,0,0.9681066672007242
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,48,2,128,1,float16,float16,0,0.5819253524144491
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,48,2,128,1,float16,fp8,0,0.5230186780293783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,48,4,128,1,float16,float16,0,0.6987306276957194
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,48,4,128,1,float16,fp8,0,0.656768004099528
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,48,8,128,1,float16,fp8,0,0.6428533395131429
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,48,8,128,1,float16,float16,0,0.6860640048980713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,48,1,128,1,float16,float16,0,0.29530133803685504
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,48,1,128,1,float16,fp8,0,0.2627200086911519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,48,48,128,1,float16,float16,0,0.5225226481755575
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,48,48,128,1,float16,fp8,0,0.5469866593678793
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,48,2,128,1,float16,float16,0,0.31199467182159424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,48,2,128,1,float16,fp8,0,0.2800053358078003
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,48,4,128,1,float16,float16,0,0.3651039997736613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,48,4,128,1,float16,fp8,0,0.3532373507817586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,48,8,128,1,float16,fp8,0,0.34442134698232013
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,48,8,128,1,float16,float16,0,0.37010665734608966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,48,1,128,1,float16,float16,0,0.15934933225313822
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,48,1,128,1,float16,fp8,0,0.1435466706752777
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,48,2,128,1,float16,float16,0,0.1682186722755432
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,48,2,128,1,float16,fp8,0,0.15331199765205383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,48,48,128,1,float16,fp8,0,0.28761066993077594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,48,48,128,1,float16,float16,0,0.29737599690755206
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,48,4,128,1,float16,float16,0,0.19779733816782633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,48,4,128,1,float16,fp8,0,0.1864373286565145
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,48,8,128,1,float16,float16,0,0.2023199995358785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,48,8,128,1,float16,fp8,0,0.19894399245580038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,48,1,128,1,float16,float16,0,0.08933333555857341
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,48,1,128,1,float16,fp8,0,0.08318933347860973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,48,48,128,1,float16,float16,0,0.16236799955368042
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,48,48,128,1,float16,fp8,0,0.15922133127848306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,48,2,128,1,float16,float16,0,0.09187733133633931
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,48,2,128,1,float16,fp8,0,0.08462400237719218
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,48,4,128,1,float16,float16,0,0.10069866975148518
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,48,4,128,1,float16,fp8,0,0.09788266817728679
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,48,8,128,1,float16,float16,0,0.10424000024795532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,48,8,128,1,float16,fp8,0,0.09779733419418335
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,48,1,128,1,float16,float16,0,0.056799997886021934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,48,48,128,1,float16,float16,0,0.09152000149091084
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,48,1,128,1,float16,fp8,0,0.0528053343296051
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,48,48,128,1,float16,fp8,0,0.08116266628106435
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,48,2,128,1,float16,fp8,0,0.054378668467203774
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,48,2,128,1,float16,float16,0,0.058335999647776283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,48,4,128,1,float16,float16,0,0.060453335444132485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,48,4,128,1,float16,fp8,0,0.06014933188756307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,48,8,128,1,float16,fp8,0,0.06062399844328562
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,48,8,128,1,float16,float16,0,0.061493332187334694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,48,1,128,1,float16,float16,0,2.19429874420166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,48,1,128,1,float16,fp8,0,1.9949866930643718
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,48,2,128,1,float16,float16,0,2.2934826215108237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,48,4,128,1,float16,float16,0,2.934272130330404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,48,4,128,1,float16,fp8,0,2.636042594909668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,48,8,128,1,float16,float16,0,2.8710079193115234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,48,2,128,1,float16,fp8,0,2.175274689992269
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,48,1,128,1,float16,float16,0,1.1185920238494873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,48,48,128,1,float16,fp8,0,2.521770636240641
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,48,1,128,1,float16,fp8,0,1.0226240158081055
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,48,8,128,1,float16,fp8,0,2.784010569254557
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,48,48,128,1,float16,float16,0,2.2319092750549316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,48,2,128,1,float16,fp8,0,1.1006666819254558
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,48,2,128,1,float16,float16,0,1.175653298695882
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,48,4,128,1,float16,float16,0,1.5179626146952312
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,48,4,128,1,float16,fp8,0,1.4662826855977376
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,48,8,128,1,float16,float16,0,1.522666613260905
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,48,8,128,1,float16,fp8,0,1.486394723256429
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,48,1,128,1,float16,float16,0,0.5883413155873617
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,48,48,128,1,float16,float16,0,1.146351973215739
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,48,1,128,1,float16,fp8,0,0.5235626697540283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,48,48,128,1,float16,fp8,0,1.2760213216145833
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,48,2,128,1,float16,float16,0,0.622650663057963
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,48,2,128,1,float16,fp8,0,0.5710293451944987
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,48,4,128,1,float16,float16,0,0.7587839762369791
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,48,4,128,1,float16,fp8,0,0.7452159722646078
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,48,8,128,1,float16,float16,0,0.7895999749501547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,48,8,128,1,float16,fp8,0,0.7654720147450765
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,48,1,128,1,float16,float16,0,0.3055306673049927
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,48,1,128,1,float16,fp8,0,0.2796586751937866
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,48,48,128,1,float16,float16,0,0.5869439840316772
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,48,48,128,1,float16,fp8,0,0.654149333635966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,48,2,128,1,float16,float16,0,0.3338666756947835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,48,2,128,1,float16,fp8,0,0.3012853264808655
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,48,4,128,1,float16,float16,0,0.4196266730626424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,48,4,128,1,float16,fp8,0,0.40241066614786786
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,48,8,128,1,float16,fp8,0,0.3720906575520833
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,48,8,128,1,float16,float16,0,0.39075199762980145
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,48,1,128,1,float16,float16,0,0.16552000244458517
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,48,1,128,1,float16,fp8,0,0.15108266472816467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,48,2,128,1,float16,float16,0,0.1799573302268982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,48,2,128,1,float16,fp8,0,0.1653493344783783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,48,48,128,1,float16,fp8,0,0.33954131603240967
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,48,48,128,1,float16,float16,0,0.30649600426356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,48,4,128,1,float16,float16,0,0.21634133656819662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,48,4,128,1,float16,fp8,0,0.21195199092229208
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,48,8,128,1,float16,float16,0,0.21940799554189047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,48,8,128,1,float16,fp8,0,0.2148746649424235
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,48,1,128,1,float16,float16,0,0.09244267145792644
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,48,1,128,1,float16,fp8,0,0.08261866867542267
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,48,2,128,1,float16,float16,0,0.09642666578292847
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,48,2,128,1,float16,fp8,0,0.08725333213806152
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,48,48,128,1,float16,float16,0,0.16529066363970438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,48,48,128,1,float16,fp8,0,0.18252799908320108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,48,4,128,1,float16,float16,0,0.11425066987673442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,48,4,128,1,float16,fp8,0,0.10335999727249146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,48,8,128,1,float16,float16,0,0.11706667145093282
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,48,8,128,1,float16,fp8,0,0.10770666599273682
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,48,1,128,1,float16,float16,0,0.05486933390299479
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,48,1,128,1,float16,fp8,0,0.05237866441408793
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,48,2,128,1,float16,float16,0,0.05605866511662801
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,48,2,128,1,float16,fp8,0,0.05460800230503082
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,48,48,128,1,float16,fp8,0,0.09936533371607463
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,48,48,128,1,float16,float16,0,0.09246933460235596
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,48,4,128,1,float16,float16,0,0.061466669042905174
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,48,4,128,1,float16,fp8,0,0.06203199923038483
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,48,8,128,1,float16,float16,0,0.06276266773541768
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,48,8,128,1,float16,fp8,0,0.060826669136683144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,48,1,128,1,float16,float16,0,0.037989333271980286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,48,1,128,1,float16,fp8,0,0.03555733213822047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,48,2,128,1,float16,float16,0,0.03937066594759623
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,48,2,128,1,float16,fp8,0,0.037834666669368744
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,48,4,128,1,float16,float16,0,0.04161600023508072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,48,48,128,1,float16,fp8,0,0.055311997731526695
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,48,48,128,1,float16,float16,0,0.049813335140546165
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,48,4,128,1,float16,fp8,0,0.04167999823888143
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,48,8,128,1,float16,fp8,0,0.04197866717974345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,48,8,128,1,float16,float16,0,0.04211199780305227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,48,1,128,1,float16,float16,0,1.678533395131429
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,48,1,128,1,float16,fp8,0,1.5321653683980305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,48,2,128,1,float16,float16,0,1.8168212572733562
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,48,2,128,1,float16,fp8,0,1.7023305892944336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,48,4,128,1,float16,float16,0,2.3843092918395996
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,48,4,128,1,float16,fp8,0,2.1583946545918784
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,48,8,128,1,float16,float16,0,2.386250654856364
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,48,8,128,1,float16,fp8,0,2.257871945699056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,48,48,128,1,float16,float16,0,1.95962127049764
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,48,1,128,1,float16,float16,0,0.861840009689331
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,48,48,128,1,float16,fp8,0,2.285205364227295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,48,1,128,1,float16,fp8,0,0.7807306448618571
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,48,2,128,1,float16,float16,0,0.9205333391825358
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,48,2,128,1,float16,fp8,0,0.86626664797465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,48,4,128,1,float16,float16,0,1.2256213029225667
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,48,4,128,1,float16,fp8,0,1.2539520263671875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,48,8,128,1,float16,float16,0,1.1767306327819824
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,48,8,128,1,float16,fp8,0,1.2103892962137859
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,48,48,128,1,float16,float16,0,1.0027626355489094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,48,1,128,1,float16,float16,0,0.44312000274658203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,48,2,128,1,float16,float16,0,0.4951999982198079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,48,1,128,1,float16,fp8,0,0.40490134557088214
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,48,48,128,1,float16,fp8,0,1.1541600227355957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,48,2,128,1,float16,fp8,0,0.4469919999440511
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,48,4,128,1,float16,fp8,0,0.5860160191853842
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,48,4,128,1,float16,float16,0,0.6265759865442911
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,48,8,128,1,float16,float16,0,0.6388533512751261
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,48,8,128,1,float16,fp8,0,0.5965280135472616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,48,1,128,1,float16,float16,0,0.23889066775639853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,48,1,128,1,float16,fp8,0,0.21241066853205362
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,48,48,128,1,float16,float16,0,0.5141813357671102
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,48,48,128,1,float16,fp8,0,0.5904320081075033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,48,2,128,1,float16,float16,0,0.25809599955876666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,48,2,128,1,float16,fp8,0,0.23480532566706339
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,48,4,128,1,float16,float16,0,0.3412160078684489
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,48,4,128,1,float16,fp8,0,0.33879466851552326
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,48,8,128,1,float16,float16,0,0.3373386859893799
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,48,8,128,1,float16,fp8,0,0.3110400040944417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,48,1,128,1,float16,float16,0,0.12865599989891052
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,48,1,128,1,float16,fp8,0,0.1178986628850301
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,48,48,128,1,float16,float16,0,0.26828799645106
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,48,48,128,1,float16,fp8,0,0.3063093423843384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,48,2,128,1,float16,float16,0,0.1395093301932017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,48,2,128,1,float16,fp8,0,0.1307413379351298
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,48,4,128,1,float16,float16,0,0.17973333597183228
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,48,4,128,1,float16,fp8,0,0.1791306734085083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,48,8,128,1,float16,float16,0,0.18188265959421793
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,48,8,128,1,float16,fp8,0,0.1798293391863505
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,48,1,128,1,float16,float16,0,0.07122133175532024
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,48,1,128,1,float16,fp8,0,0.06294933458169301
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,48,2,128,1,float16,float16,0,0.07442666590213776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,48,2,128,1,float16,fp8,0,0.06834666430950165
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,48,48,128,1,float16,float16,0,0.14512000481287637
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,48,48,128,1,float16,fp8,0,0.1622933348019918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,48,4,128,1,float16,float16,0,0.09265066186587016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,48,4,128,1,float16,fp8,0,0.08396800359090169
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,48,8,128,1,float16,float16,0,0.09949866930643718
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,48,8,128,1,float16,fp8,0,0.08979733784993489
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,48,1,128,1,float16,fp8,0,0.040576001008351646
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,48,1,128,1,float16,float16,0,0.04266666869322459
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,48,2,128,1,float16,fp8,0,0.043807998299598694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,48,2,128,1,float16,float16,0,0.04364799956480662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,48,48,128,1,float16,float16,0,0.08021333316961925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,48,48,128,1,float16,fp8,0,0.0881119966506958
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,48,4,128,1,float16,float16,0,0.04965866605440775
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,48,4,128,1,float16,fp8,0,0.049685334165891014
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,48,8,128,1,float16,fp8,0,0.05154666801293691
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,48,8,128,1,float16,float16,0,0.050111999114354454
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,48,1,128,1,float16,float16,0,0.02972800036271413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,48,1,128,1,float16,fp8,0,0.027349332968393963
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,48,2,128,1,float16,float16,0,0.030229332546393078
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,48,2,128,1,float16,fp8,0,0.02943466603755951
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,48,4,128,1,float16,float16,0,0.033146666983763375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,48,48,128,1,float16,fp8,0,0.047135998805363975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,48,48,128,1,float16,float16,0,0.041850666205088295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,48,4,128,1,float16,fp8,0,0.03348266581694285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,48,8,128,1,float16,float16,0,0.033615998923778534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,48,8,128,1,float16,fp8,0,0.03350933392842611
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,48,1,128,1,float16,float16,0,0.021125334004561108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,48,1,128,1,float16,fp8,0,0.021114667256673176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,48,2,128,1,float16,float16,0,0.021066665649414062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,48,2,128,1,float16,fp8,0,0.02102400114138921
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,48,48,128,1,float16,float16,0,0.025578667720158894
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,48,4,128,1,float16,float16,0,0.021151999632517498
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,48,48,128,1,float16,fp8,0,0.029343999922275543
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,48,4,128,1,float16,fp8,0,0.023130667706330616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,48,8,128,1,float16,float16,0,0.021488000949223835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,48,8,128,1,float16,fp8,0,0.023082666099071503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,48,1,128,1,float16,float16,0,0.6928479671478271
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,48,2,128,1,float16,float16,0,0.7590880393981934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,48,1,128,1,float16,fp8,0,0.6551093260447184
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,48,2,128,1,float16,fp8,0,0.7266879876454672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,48,4,128,1,float16,float16,0,1.0994933446248372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,48,4,128,1,float16,fp8,0,1.1111839612325032
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,48,8,128,1,float16,float16,0,1.1020906766255696
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,48,8,128,1,float16,fp8,0,1.099455992380778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,48,48,128,1,float16,float16,0,0.9212480386098226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,48,1,128,1,float16,fp8,0,0.3349333206812541
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,48,1,128,1,float16,float16,0,0.35864531993865967
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,48,48,128,1,float16,fp8,0,1.0881973107655842
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,48,2,128,1,float16,fp8,0,0.3811306556065877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,48,2,128,1,float16,float16,0,0.4091573158899943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,48,4,128,1,float16,float16,0,0.5431573390960693
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,48,4,128,1,float16,fp8,0,0.5194826523462931
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,48,8,128,1,float16,float16,0,0.5559893449147543
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,48,8,128,1,float16,fp8,0,0.5525493224461874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,48,48,128,1,float16,float16,0,0.46953598658243817
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,48,1,128,1,float16,float16,0,0.19801066319147745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,48,1,128,1,float16,fp8,0,0.1811359922091166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,48,48,128,1,float16,fp8,0,0.5558079878489176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,48,2,128,1,float16,float16,0,0.2153493364651998
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,48,2,128,1,float16,fp8,0,0.20212799310684204
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,48,4,128,1,float16,float16,0,0.29595200220743817
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,48,4,128,1,float16,fp8,0,0.30364267031351727
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,48,8,128,1,float16,float16,0,0.29201600948969525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,48,8,128,1,float16,fp8,0,0.27563732862472534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,48,1,128,1,float16,float16,0,0.10876267155011494
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,48,1,128,1,float16,fp8,0,0.10035733381907146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,48,48,128,1,float16,float16,0,0.24623999993006387
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,48,48,128,1,float16,fp8,0,0.28777599334716797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,48,2,128,1,float16,fp8,0,0.11089066664377849
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,48,2,128,1,float16,float16,0,0.11629333098729451
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,48,4,128,1,float16,float16,0,0.16313067078590393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,48,4,128,1,float16,fp8,0,0.1545973320802053
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,48,8,128,1,float16,float16,0,0.15575466553370157
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,48,8,128,1,float16,fp8,0,0.1502133309841156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,48,1,128,1,float16,float16,0,0.05942933261394501
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,48,2,128,1,float16,float16,0,0.06418133278687795
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,48,1,128,1,float16,fp8,0,0.056218668818473816
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,48,48,128,1,float16,fp8,0,0.153221329053243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,48,48,128,1,float16,float16,0,0.13395733634630838
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,48,2,128,1,float16,fp8,0,0.060138667623202004
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,48,4,128,1,float16,float16,0,0.0825386643409729
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,48,4,128,1,float16,fp8,0,0.07658666869004567
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,48,8,128,1,float16,float16,0,0.08724799752235413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,48,8,128,1,float16,fp8,0,0.07853866616884868
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,48,1,128,1,float16,fp8,0,0.033626665671666466
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,48,1,128,1,float16,float16,0,0.03540800015131632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,48,2,128,1,float16,float16,0,0.03555733213822047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,48,2,128,1,float16,fp8,0,0.035717333356539406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,48,48,128,1,float16,fp8,0,0.08045866588751475
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,48,48,128,1,float16,float16,0,0.07231999933719635
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,48,4,128,1,float16,float16,0,0.04194133480389913
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,48,4,128,1,float16,fp8,0,0.043749332427978516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,48,8,128,1,float16,float16,0,0.04167999823888143
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,48,8,128,1,float16,fp8,0,0.04378666480382284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,48,1,128,1,float16,fp8,0,0.025205334027608235
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,48,1,128,1,float16,float16,0,0.025125332176685333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,48,2,128,1,float16,float16,0,0.027189334233601887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,48,2,128,1,float16,fp8,0,0.025258667767047882
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,48,48,128,1,float16,fp8,0,0.043840001026789345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,48,48,128,1,float16,float16,0,0.036901332437992096
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,48,4,128,1,float16,float16,0,0.029317334294319153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,48,4,128,1,float16,fp8,0,0.029834667841593426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,48,8,128,1,float16,float16,0,0.029290666182835896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,48,8,128,1,float16,fp8,0,0.029733332494894665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,48,1,128,1,float16,float16,0,0.017386666188637417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,48,1,128,1,float16,fp8,0,0.017445333302021027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,48,2,128,1,float16,float16,0,0.017386666188637417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,48,48,128,1,float16,float16,0,0.02332266668478648
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,48,2,128,1,float16,fp8,0,0.01699200024207433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,48,48,128,1,float16,fp8,0,0.027301333844661713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,48,4,128,1,float16,float16,0,0.019434666881958645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,48,4,128,1,float16,fp8,0,0.019424000134070713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,48,8,128,1,float16,float16,0,0.01924266666173935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,48,8,128,1,float16,fp8,0,0.019130667050679524
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,48,1,128,1,float16,float16,0,0.017093333105246227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,48,2,128,1,float16,float16,0,0.017173333714405697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,48,1,128,1,float16,fp8,0,0.0170666662355264
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,48,48,128,1,float16,fp8,0,0.021381333470344543
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,48,2,128,1,float16,fp8,0,0.016970666746298473
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,48,48,128,1,float16,float16,0,0.01937599976857503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,48,4,128,1,float16,float16,0,0.01701333373785019
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,48,4,128,1,float16,fp8,0,0.017423999806245167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,48,8,128,1,float16,float16,0,0.017221332838137943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,48,8,128,1,float16,fp8,0,0.017130666722853977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,48,1,128,1,float16,float16,0,0.4200906753540039
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,48,1,128,1,float16,fp8,0,0.3909813165664673
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,48,2,128,1,float16,float16,0,0.46302398045857746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,48,2,128,1,float16,fp8,0,0.44228800137837726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,48,4,128,1,float16,float16,0,0.6020266612370809
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,48,4,128,1,float16,fp8,0,0.5661173264185587
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,48,8,128,1,float16,float16,0,0.61407999197642
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,48,8,128,1,float16,fp8,0,0.6246506770451864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,48,1,128,1,float16,float16,0,0.22325867414474487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,48,48,128,1,float16,fp8,0,0.580623984336853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,48,1,128,1,float16,fp8,0,0.20434133211771646
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,48,48,128,1,float16,float16,0,0.4975573221842448
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,48,2,128,1,float16,float16,0,0.24103999137878418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,48,2,128,1,float16,fp8,0,0.23138133684794107
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,48,4,128,1,float16,float16,0,0.3249173363049825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,48,4,128,1,float16,fp8,0,0.3327733278274536
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,48,8,128,1,float16,float16,0,0.3035306731859843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,48,8,128,1,float16,fp8,0,0.30134399731953937
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,48,1,128,1,float16,float16,0,0.11982400218645732
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,48,48,128,1,float16,float16,0,0.25784534215927124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,48,48,128,1,float16,fp8,0,0.29809067646662396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,48,1,128,1,float16,fp8,0,0.11310933033625285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,48,2,128,1,float16,float16,0,0.12778133153915405
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,48,2,128,1,float16,fp8,0,0.12573333581288657
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,48,4,128,1,float16,float16,0,0.16714666287104288
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,48,4,128,1,float16,fp8,0,0.1643946667512258
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,48,8,128,1,float16,float16,0,0.16849599281946817
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,48,8,128,1,float16,fp8,0,0.1740586757659912
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,48,1,128,1,float16,float16,0,0.0652106652657191
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,48,48,128,1,float16,float16,0,0.13635200262069702
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,48,48,128,1,float16,fp8,0,0.15848533312479654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,48,1,128,1,float16,fp8,0,0.060218666990598045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,48,2,128,1,float16,float16,0,0.07283199826876323
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,48,4,128,1,float16,float16,0,0.09019200007120769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,48,2,128,1,float16,fp8,0,0.06621333460013072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,48,4,128,1,float16,fp8,0,0.08046933511892955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,48,8,128,1,float16,float16,0,0.0892639954884847
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,48,8,128,1,float16,fp8,0,0.0830026666323344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,48,1,128,1,float16,float16,0,0.039488000174363456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,48,1,128,1,float16,fp8,0,0.03749866783618927
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,48,48,128,1,float16,fp8,0,0.08462400237719218
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,48,2,128,1,float16,float16,0,0.03986666599909464
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,48,48,128,1,float16,float16,0,0.07486400008201599
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,48,2,128,1,float16,fp8,0,0.037802666425704956
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,48,4,128,1,float16,float16,0,0.04596266647179922
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,48,4,128,1,float16,fp8,0,0.04577066500981649
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,48,8,128,1,float16,float16,0,0.04585599899291992
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,48,8,128,1,float16,fp8,0,0.04785066843032837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,48,1,128,1,float16,float16,0,0.02380799998839696
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,48,1,128,1,float16,fp8,0,0.023205332458019257
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,48,48,128,1,float16,float16,0,0.03575466573238373
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,48,2,128,1,float16,float16,0,0.025519999365011852
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,48,48,128,1,float16,fp8,0,0.041663999358812966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,48,4,128,1,float16,fp8,0,0.03142933299144109
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,48,2,128,1,float16,fp8,0,0.025536000728607178
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,48,4,128,1,float16,float16,0,0.028117333849271137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,48,8,128,1,float16,float16,0,0.027248000105222065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,48,8,128,1,float16,fp8,0,0.029194665451844532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,48,1,128,1,float16,float16,0,0.01907733331123988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,48,1,128,1,float16,fp8,0,0.01934933289885521
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,48,2,128,1,float16,float16,0,0.019098666807015736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,48,2,128,1,float16,fp8,0,0.018778666853904724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,48,4,128,1,float16,float16,0,0.02077866718173027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,48,48,128,1,float16,float16,0,0.02350933353106181
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,48,48,128,1,float16,fp8,0,0.02940800040960312
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,48,4,128,1,float16,fp8,0,0.02120000123977661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,48,8,128,1,float16,float16,0,0.020949333906173706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,48,8,128,1,float16,fp8,0,0.021189334491888683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,48,1,128,1,float16,float16,0,0.015066667149464289
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,48,2,128,1,float16,float16,0,0.015013333410024643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,48,1,128,1,float16,fp8,0,0.013338666409254074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,48,48,128,1,float16,fp8,0,0.019109333554903667
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,48,2,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,48,4,128,1,float16,float16,0,0.01314666618903478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,48,48,128,1,float16,float16,0,0.017312000195185345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,48,4,128,1,float16,fp8,0,0.014912000546852747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,48,8,128,1,float16,float16,0,0.014357333381970724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,48,8,128,1,float16,fp8,0,0.013536000003417334
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,48,1,128,1,float16,float16,0,0.012938667088747025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,48,1,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,48,2,128,1,float16,float16,0,0.013104000439246496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,48,2,128,1,float16,fp8,0,0.013194666554530462
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,48,4,128,1,float16,float16,0,0.013221333424250284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,48,48,128,1,float16,fp8,0,0.016869333883126576
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,48,48,128,1,float16,float16,0,0.015146666516860327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,48,4,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,48,8,128,1,float16,float16,0,0.012997332960367203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,48,8,128,1,float16,fp8,0,0.013872000078360239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,48,1,128,1,float16,fp8,0,0.3036373257637024
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,48,2,128,1,float16,float16,0,0.36073601245880127
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,48,1,128,1,float16,float16,0,0.3425439993540446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,48,2,128,1,float16,fp8,0,0.32368000348409015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,48,4,128,1,float16,float16,0,0.4500693480173747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,48,4,128,1,float16,fp8,0,0.40677865346272785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,48,8,128,1,float16,fp8,0,0.3999679883321126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,48,8,128,1,float16,float16,0,0.42205333709716797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,48,1,128,1,float16,float16,0,0.1805760065714518
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,48,1,128,1,float16,fp8,0,0.16172266999880472
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,48,48,128,1,float16,fp8,0,0.3465546766916911
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,48,48,128,1,float16,float16,0,0.34656532605489093
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,48,2,128,1,float16,float16,0,0.18919465939203897
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,48,2,128,1,float16,fp8,0,0.16939733425776163
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,48,8,128,1,float16,float16,0,0.23270400365193686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,48,4,128,1,float16,float16,0,0.22572267055511475
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,48,8,128,1,float16,fp8,0,0.223471999168396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,48,4,128,1,float16,fp8,0,0.21308799584706625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,48,1,128,1,float16,fp8,0,0.082805335521698
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,48,1,128,1,float16,float16,0,0.09595200419425964
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,48,48,128,1,float16,fp8,0,0.181386669476827
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,48,48,128,1,float16,float16,0,0.18214933077494302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,48,2,128,1,float16,float16,0,0.10149332880973816
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,48,2,128,1,float16,fp8,0,0.08912000060081482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,48,8,128,1,float16,fp8,0,0.11345066626866658
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,48,8,128,1,float16,float16,0,0.11914133032162984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,48,4,128,1,float16,fp8,0,0.10752532879511516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,48,4,128,1,float16,float16,0,0.11769066254297893
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,48,1,128,1,float16,float16,0,0.05269866685072581
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,48,1,128,1,float16,fp8,0,0.04781333108743032
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,48,2,128,1,float16,float16,0,0.05385066568851471
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,48,2,128,1,float16,fp8,0,0.04974933465321859
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,48,48,128,1,float16,float16,0,0.09684800108273824
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,48,48,128,1,float16,fp8,0,0.09964266419410706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,48,4,128,1,float16,fp8,0,0.058149332801500954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,48,4,128,1,float16,float16,0,0.05925333499908447
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,48,8,128,1,float16,float16,0,0.06015466650327047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,48,8,128,1,float16,fp8,0,0.05782933533191681
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,48,1,128,1,float16,float16,0,0.031701333820819855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,48,1,128,1,float16,fp8,0,0.029418667157491047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,48,48,128,1,float16,fp8,0,0.04865066707134247
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,48,48,128,1,float16,float16,0,0.04365866879622141
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,48,2,128,1,float16,float16,0,0.033285332222779594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,48,2,128,1,float16,fp8,0,0.031557333966096245
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,48,4,128,1,float16,fp8,0,0.03542399903138479
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,48,4,128,1,float16,float16,0,0.03547733277082443
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,48,8,128,1,float16,float16,0,0.03579733272393545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,48,8,128,1,float16,fp8,0,0.035642666121323906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,48,1,128,1,float16,float16,0,0.021402666966120403
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,48,48,128,1,float16,fp8,0,0.030741333961486816
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,48,1,128,1,float16,fp8,0,0.021013334393501282
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,48,48,128,1,float16,float16,0,0.02532800038655599
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,48,2,128,1,float16,float16,0,0.021359999974568684
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,48,2,128,1,float16,fp8,0,0.02120000123977661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,48,4,128,1,float16,fp8,0,0.022170667846997578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,48,4,128,1,float16,float16,0,0.023344000180562336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,48,8,128,1,float16,float16,0,0.023333333432674408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,48,8,128,1,float16,fp8,0,0.023157333334287006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,48,1,128,1,float16,float16,0,0.01685333376129468
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,48,1,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,48,48,128,1,float16,float16,0,0.01916266605257988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,48,48,128,1,float16,fp8,0,0.02126399924357732
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,48,2,128,1,float16,float16,0,0.0169813334941864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,48,4,128,1,float16,float16,0,0.017024000485738117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,48,2,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,48,4,128,1,float16,fp8,0,0.01716800034046173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,48,8,128,1,float16,float16,0,0.017114666601022083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,48,8,128,1,float16,fp8,0,0.01703466723362605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,48,1,128,1,float16,float16,0,0.013088000317414602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,48,1,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,48,48,128,1,float16,float16,0,0.015295999745527903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,48,2,128,1,float16,float16,0,0.01341333364446958
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,48,48,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,48,2,128,1,float16,fp8,0,0.013162666310866674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,48,4,128,1,float16,float16,0,0.013082666943470636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,48,8,128,1,float16,float16,0,0.013343999783198038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,48,4,128,1,float16,fp8,0,0.013477332890033722
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,48,8,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,48,1,128,1,float16,float16,0,0.01322666679819425
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,48,1,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,48,2,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,48,2,128,1,float16,float16,0,0.013290667285521826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,48,48,128,1,float16,float16,0,0.014831999937693277
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,48,4,128,1,float16,float16,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,48,48,128,1,float16,fp8,0,0.01524266724785169
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,48,4,128,1,float16,fp8,0,0.013290667285521826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,48,8,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,48,8,128,1,float16,float16,0,0.012863999853531519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,48,1,128,1,float16,fp8,0,0.2651253342628479
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,48,1,128,1,float16,float16,0,0.31618134180704754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,48,2,128,1,float16,fp8,0,0.27722134192784625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,48,2,128,1,float16,float16,0,0.32145599524180096
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,48,4,128,1,float16,float16,0,0.36353600025177
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,48,4,128,1,float16,fp8,0,0.3173866669336955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,48,8,128,1,float16,float16,0,0.36407466729482013
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,48,8,128,1,float16,fp8,0,0.3269066611925761
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,48,1,128,1,float16,float16,0,0.16544000307718912
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,48,1,128,1,float16,fp8,0,0.13538666566212973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,48,48,128,1,float16,float16,0,0.24945066372553507
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,48,48,128,1,float16,fp8,0,0.23117866118748984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,48,2,128,1,float16,float16,0,0.1725333333015442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,48,2,128,1,float16,fp8,0,0.14020267128944397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,48,4,128,1,float16,float16,0,0.19193067153294882
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,48,4,128,1,float16,fp8,0,0.15633066495259604
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,48,8,128,1,float16,float16,0,0.18787733713785806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,48,8,128,1,float16,fp8,0,0.16306133071581522
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,48,1,128,1,float16,float16,0,0.08692266543706258
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,48,1,128,1,float16,fp8,0,0.07237333556016286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,48,48,128,1,float16,float16,0,0.13099732995033264
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,48,48,128,1,float16,fp8,0,0.12427199880282085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,48,2,128,1,float16,float16,0,0.08920533458391826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,48,2,128,1,float16,fp8,0,0.07448533177375793
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,48,4,128,1,float16,float16,0,0.09416533509890239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,48,4,128,1,float16,fp8,0,0.08313066760698955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,48,8,128,1,float16,float16,0,0.09513599673906963
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,48,8,128,1,float16,fp8,0,0.0830026666323344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,48,1,128,1,float16,float16,0,0.048570667703946434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,48,1,128,1,float16,fp8,0,0.041722665230433144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,48,48,128,1,float16,float16,0,0.07098666826883952
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,48,48,128,1,float16,fp8,0,0.06223999957243601
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,48,2,128,1,float16,float16,0,0.049925332268079124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,48,2,128,1,float16,fp8,0,0.043322667479515076
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,48,4,128,1,float16,float16,0,0.05176533261934916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,48,4,128,1,float16,fp8,0,0.04775466521581014
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,48,8,128,1,float16,float16,0,0.052042668064435325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,48,8,128,1,float16,fp8,0,0.04773866633574168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,48,1,128,1,float16,float16,0,0.029631999631722767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,48,1,128,1,float16,fp8,0,0.027274665733178455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,48,2,128,1,float16,float16,0,0.029450667401154835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,48,2,128,1,float16,fp8,0,0.027327999472618103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,48,48,128,1,float16,float16,0,0.03339199970165888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,48,48,128,1,float16,fp8,0,0.03750933210055033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,48,4,128,1,float16,float16,0,0.03140799949566523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,48,4,128,1,float16,fp8,0,0.029391999046007793
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,48,8,128,1,float16,fp8,0,0.02951466788848241
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,48,8,128,1,float16,float16,0,0.029824001093705494
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,48,1,128,1,float16,float16,0,0.01940800001223882
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,48,48,128,1,float16,float16,0,0.021253332495689392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,48,1,128,1,float16,fp8,0,0.019167999426523846
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,48,48,128,1,float16,fp8,0,0.023077333966890972
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,48,2,128,1,float16,float16,0,0.01942933350801468
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,48,4,128,1,float16,float16,0,0.019754666835069656
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,48,2,128,1,float16,fp8,0,0.01930133377512296
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,48,4,128,1,float16,fp8,0,0.019029332945744198
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,48,8,128,1,float16,fp8,0,0.019237333287795384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,48,8,128,1,float16,float16,0,0.019493332753578823
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,48,1,128,1,float16,float16,0,0.015061333775520325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,48,48,128,1,float16,float16,0,0.01732800031701724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,48,1,128,1,float16,fp8,0,0.01534933348496755
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,48,48,128,1,float16,fp8,0,0.019088000059127808
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,48,2,128,1,float16,fp8,0,0.01516266663869222
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,48,4,128,1,float16,float16,0,0.016373333831628162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,48,2,128,1,float16,float16,0,0.016095999628305435
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,48,4,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,48,8,128,1,float16,float16,0,0.016837333639462788
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,48,8,128,1,float16,fp8,0,0.016271999726692837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,48,1,128,1,float16,float16,0,0.013258667041858038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,48,1,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,48,48,128,1,float16,fp8,0,0.015226667126019796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,48,2,128,1,float16,float16,0,0.013221333424250284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,48,48,128,1,float16,float16,0,0.013290667285521826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,48,2,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,48,4,128,1,float16,float16,0,0.013088000317414602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,48,8,128,1,float16,float16,0,0.013050666699806849
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,48,4,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,48,8,128,1,float16,fp8,0,0.013194666554530462
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,48,1,128,1,float16,float16,0,0.012853333105643591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,48,1,128,1,float16,fp8,0,0.01259200026591619
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,48,48,128,1,float16,float16,0,0.013274667163689932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,48,2,128,1,float16,float16,0,0.012981332838535309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,48,48,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,48,2,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,48,4,128,1,float16,float16,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,48,4,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,48,8,128,1,float16,float16,0,0.012863999853531519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,48,8,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,48,1,128,1,float16,fp8,0,0.23916800816853842
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,48,1,128,1,float16,float16,0,0.3059893250465393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,48,2,128,1,float16,float16,0,0.3148159980773926
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,48,2,128,1,float16,fp8,0,0.2454986572265625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,48,4,128,1,float16,float16,0,0.3311199943224589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,48,4,128,1,float16,fp8,0,0.26846933364868164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,48,8,128,1,float16,fp8,0,0.270906666914622
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,48,8,128,1,float16,float16,0,0.3292959928512573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,48,1,128,1,float16,float16,0,0.15494400262832642
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,48,1,128,1,float16,fp8,0,0.12567999958992004
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,48,48,128,1,float16,float16,0,0.1999786694844564
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,48,48,128,1,float16,fp8,0,0.1750026742617289
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,48,2,128,1,float16,fp8,0,0.12758933504422507
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,48,2,128,1,float16,float16,0,0.15759467085202536
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,48,4,128,1,float16,float16,0,0.1655786633491516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,48,4,128,1,float16,fp8,0,0.1344213287035624
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,48,8,128,1,float16,float16,0,0.17128000656763712
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,48,8,128,1,float16,fp8,0,0.13435199856758118
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,48,48,128,1,float16,float16,0,0.1093280017375946
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,48,1,128,1,float16,float16,0,0.08535466591517131
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,48,1,128,1,float16,fp8,0,0.0666133314371109
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,48,48,128,1,float16,fp8,0,0.0941386620203654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,48,2,128,1,float16,fp8,0,0.0697813332080841
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,48,4,128,1,float16,float16,0,0.0867733359336853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,48,2,128,1,float16,float16,0,0.0849173367023468
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,48,4,128,1,float16,fp8,0,0.07266133526961009
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,48,8,128,1,float16,float16,0,0.0883733332157135
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,48,8,128,1,float16,fp8,0,0.07264000177383423
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,48,1,128,1,float16,float16,0,0.04795200129350027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,48,48,128,1,float16,float16,0,0.0531626691420873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,48,1,128,1,float16,fp8,0,0.03985599925120672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,48,48,128,1,float16,fp8,0,0.050154666105906166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,48,2,128,1,float16,float16,0,0.04800533254941305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,48,4,128,1,float16,float16,0,0.047781333327293396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,48,2,128,1,float16,fp8,0,0.039647998909155525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,48,4,128,1,float16,fp8,0,0.04195733368396759
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,48,8,128,1,float16,float16,0,0.04964800179004669
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,48,8,128,1,float16,fp8,0,0.041802664597829185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,48,1,128,1,float16,float16,0,0.02923733244339625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,48,2,128,1,float16,float16,0,0.029215998947620392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,48,1,128,1,float16,fp8,0,0.02536533276240031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,48,48,128,1,float16,fp8,0,0.029391999046007793
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,48,48,128,1,float16,float16,0,0.02922666569550832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,48,2,128,1,float16,fp8,0,0.025418666501839954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,48,4,128,1,float16,float16,0,0.028901333610216778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,48,8,128,1,float16,float16,0,0.02752000093460083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,48,4,128,1,float16,fp8,0,0.025445332129796345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,48,8,128,1,float16,fp8,0,0.026474667092164356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,48,1,128,1,float16,float16,0,0.019333332777023315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,48,1,128,1,float16,fp8,0,0.01918399954835574
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,48,2,128,1,float16,float16,0,0.019152000546455383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,48,2,128,1,float16,fp8,0,0.018842666099468868
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,48,48,128,1,float16,float16,0,0.02109866589307785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,48,48,128,1,float16,fp8,0,0.021295999487241108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,48,4,128,1,float16,float16,0,0.01905599981546402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,48,4,128,1,float16,fp8,0,0.01877333347996076
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,48,8,128,1,float16,float16,0,0.019733333339293797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,48,8,128,1,float16,fp8,0,0.018965333700180054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,48,1,128,1,float16,float16,0,0.015034666905800501
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,48,1,128,1,float16,fp8,0,0.01544533297419548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,48,48,128,1,float16,fp8,0,0.018805333723624546
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,48,48,128,1,float16,float16,0,0.017162666966517765
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,48,2,128,1,float16,float16,0,0.014906667172908783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,48,2,128,1,float16,fp8,0,0.014848000059525171
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,48,4,128,1,float16,float16,0,0.015290666371583939
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,48,8,128,1,float16,fp8,0,0.015530666957298914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,48,8,128,1,float16,float16,0,0.015002666662136713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,48,4,128,1,float16,fp8,0,0.015360000232855478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,48,1,128,1,float16,float16,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,48,1,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,48,2,128,1,float16,float16,0,0.012639999389648438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,48,48,128,1,float16,float16,0,0.014848000059525171
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,48,48,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,48,2,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,48,4,128,1,float16,float16,0,0.012960000584522883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,48,4,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,48,8,128,1,float16,float16,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,48,8,128,1,float16,fp8,0,0.013183999806642532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,48,1,128,1,float16,float16,0,0.01137599969903628
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,48,1,128,1,float16,fp8,0,0.012479999413092932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,48,2,128,1,float16,float16,0,0.012154666086037954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,48,48,128,1,float16,float16,0,0.013104000439246496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,48,48,128,1,float16,fp8,0,0.01525866612792015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,48,2,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,48,4,128,1,float16,float16,0,0.012885333349307379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,48,4,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,48,8,128,1,float16,float16,0,0.012954667210578918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,48,8,128,1,float16,fp8,0,0.012645332763592402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,0,0.020138667275508244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,48,4,128,1,float16,float16,0,0.0229066660006841
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,48,8,128,1,float16,float16,0,0.027461332579453785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,48,48,128,1,float16,float16,0,0.0784693310658137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,0,0.02550400048494339
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,48,48,128,1,float16,fp8,0,0.06085866689682007
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,0,0.015376000354687372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,0,0.019274666905403137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,0,0.017184000462293625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,48,48,128,1,float16,float16,0,0.04340266684691111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,48,48,128,1,float16,fp8,0,0.03414933383464813
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,0,0.012837332983811697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,0,0.015168000012636185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,0,0.013269333789745966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,48,48,128,1,float16,float16,0,0.02454400062561035
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,48,48,128,1,float16,fp8,0,0.02342933416366577
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,0,0.010954666882753372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,0,0.01108266661564509
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,48,48,128,1,float16,float16,0,0.017301333447297413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,48,48,128,1,float16,fp8,0,0.019146667172511418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,0,0.010757333288590113
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,0,0.010677333921194077
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,48,48,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,48,48,128,1,float16,float16,0,0.015130666395028433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,0,0.010762666662534079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,0,0.010773333410422007
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,0,0.010821333775917688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,48,48,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,0,0.010735999792814255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,48,48,128,1,float16,fp8,0,0.013701333353916803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,0,0.010634666929642359
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,0,0.010464000205198923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,48,48,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,0,0.01071999967098236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,48,48,128,1,float16,float16,0,0.010885333021481832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,0,0.010703999549150467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,0,0.010826667149861654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,0,0.010741333166758219
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,48,48,128,1,float16,float16,0,0.011194666226704916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,48,48,128,1,float16,fp8,0,0.013162666310866674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,0,0.010618666807810465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,0,0.011168000598748526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,0,0.00922133338948091
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,0,0.01081066702802976
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,40,1,128,1,float16,fp8,0,14.963578542073568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,40,2,128,1,float16,float16,0,24.232533772786457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,40,1,128,1,float16,float16,0,23.446665445963543
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,40,2,128,1,float16,fp8,0,14.108464558919271
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,40,4,128,1,float16,fp8,0,14.089883168538412
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,40,8,128,1,float16,fp8,0,14.433999379475912
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,40,4,128,1,float16,float16,0,26.504656473795574
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,40,8,128,1,float16,float16,0,23.68298594156901
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,40,40,128,1,float16,fp8,0,7.8380692799886065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,40,40,128,1,float16,float16,0,11.959973653157553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,40,1,128,1,float16,fp8,0,7.434165318806966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,40,1,128,1,float16,float16,0,15.088101704915365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,40,2,128,1,float16,fp8,0,7.049456278483073
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,40,2,128,1,float16,float16,0,10.906351725260416
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,40,4,128,1,float16,fp8,0,8.174293518066406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,40,4,128,1,float16,float16,0,12.644490559895834
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,40,8,128,1,float16,fp8,0,7.131317138671875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,40,40,128,1,float16,float16,0,4.8433332443237305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,40,8,128,1,float16,float16,0,13.039839426676432
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,40,40,128,1,float16,fp8,0,4.016384124755859
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,40,1,128,1,float16,float16,0,3.856687863667806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,40,1,128,1,float16,fp8,0,3.358432133992513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,40,2,128,1,float16,fp8,0,3.384245236714681
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,40,2,128,1,float16,float16,0,5.275034586588542
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,40,4,128,1,float16,fp8,0,3.545381228129069
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,40,4,128,1,float16,float16,0,5.327797253926595
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,40,8,128,1,float16,float16,0,4.8358612060546875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,40,8,128,1,float16,fp8,0,3.705018679300944
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,40,1,128,1,float16,float16,0,2.1938506762186685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,40,40,128,1,float16,float16,0,2.307744026184082
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,40,40,128,1,float16,fp8,0,2.2596640586853027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,40,1,128,1,float16,fp8,0,1.7371946970621746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,40,2,128,1,float16,float16,0,2.2532639503479004
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,40,2,128,1,float16,fp8,0,1.9225014050801594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,40,4,128,1,float16,fp8,0,1.8576374053955078
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,40,4,128,1,float16,float16,0,2.100330670674642
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,40,8,128,1,float16,fp8,0,1.8314026196797688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,40,8,128,1,float16,float16,0,2.423744042714437
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,40,1,128,1,float16,fp8,0,8.705626805623373
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,40,2,128,1,float16,fp8,0,7.796464284261067
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,40,1,128,1,float16,float16,0,14.918351491292318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,40,2,128,1,float16,float16,0,15.47699228922526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,40,4,128,1,float16,fp8,0,8.291407903035482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,40,4,128,1,float16,float16,0,15.079461415608725
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,40,8,128,1,float16,fp8,0,8.992287953694662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,40,8,128,1,float16,float16,0,14.202415466308594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,40,40,128,1,float16,float16,0,6.181264241536458
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,40,40,128,1,float16,fp8,0,4.806501388549805
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,40,1,128,1,float16,float16,0,5.540485382080078
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,40,1,128,1,float16,fp8,0,3.8315200805664062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,40,2,128,1,float16,float16,0,6.3459517161051435
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,40,2,128,1,float16,fp8,0,3.8459625244140625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,40,4,128,1,float16,fp8,0,4.139941215515137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,40,4,128,1,float16,float16,0,6.322432200113933
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,40,8,128,1,float16,fp8,0,4.09445317586263
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,40,8,128,1,float16,float16,0,6.064602533976237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,40,40,128,1,float16,float16,0,2.641690731048584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,40,40,128,1,float16,fp8,0,2.470341364542643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,40,1,128,1,float16,float16,0,2.2504266103108725
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,40,1,128,1,float16,fp8,0,1.9781333605448406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,40,2,128,1,float16,fp8,0,1.977776050567627
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,40,2,128,1,float16,float16,0,2.2726453145345054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,40,4,128,1,float16,float16,0,2.4076587359110513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,40,4,128,1,float16,fp8,0,2.1300907135009766
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,40,8,128,1,float16,fp8,0,2.1069706281026206
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,40,8,128,1,float16,float16,0,2.614112059275309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,40,40,128,1,float16,fp8,0,1.2959840297698975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,40,1,128,1,float16,float16,0,1.2055520216623943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,40,40,128,1,float16,float16,0,1.3817013104756672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,40,1,128,1,float16,fp8,0,1.1611200173695881
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,40,2,128,1,float16,fp8,0,1.0594666798909504
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,40,2,128,1,float16,float16,0,1.2171626885732014
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,40,4,128,1,float16,float16,0,1.2736106713612874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,40,4,128,1,float16,fp8,0,1.1134453614552815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,40,8,128,1,float16,fp8,0,1.121893326441447
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,40,8,128,1,float16,float16,0,1.2589066823323567
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,40,1,128,1,float16,float16,0,6.169104258219401
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,40,1,128,1,float16,fp8,0,6.0106455485026045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,40,2,128,1,float16,fp8,0,5.499477386474609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,40,2,128,1,float16,float16,0,9.055728276570639
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,40,4,128,1,float16,fp8,0,6.208010355631511
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,40,4,128,1,float16,float16,0,9.02023442586263
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,40,8,128,1,float16,float16,0,8.290026982625326
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,40,8,128,1,float16,fp8,0,5.836570739746094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,40,40,128,1,float16,float16,0,3.886101404825846
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,40,1,128,1,float16,fp8,0,2.755429267883301
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,40,40,128,1,float16,fp8,0,3.551583925882975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,40,1,128,1,float16,float16,0,4.546127955118815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,40,2,128,1,float16,fp8,0,2.7845331827799478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,40,2,128,1,float16,float16,0,4.799770673116048
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,40,4,128,1,float16,float16,0,3.5210774739583335
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,40,4,128,1,float16,fp8,0,2.9619626998901367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,40,8,128,1,float16,float16,0,3.4787734349568686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,40,8,128,1,float16,fp8,0,3.047109285990397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,40,40,128,1,float16,float16,0,1.9474186897277832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,40,1,128,1,float16,float16,0,1.6316320101420085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,40,1,128,1,float16,fp8,0,1.5141706466674805
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,40,2,128,1,float16,float16,0,1.6490772565205891
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,40,40,128,1,float16,fp8,0,1.826240062713623
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,40,2,128,1,float16,fp8,0,1.4673226674397786
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,40,4,128,1,float16,float16,0,1.749578634897868
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,40,4,128,1,float16,fp8,0,1.553749402364095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,40,8,128,1,float16,fp8,0,1.5543146133422852
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,40,8,128,1,float16,float16,0,1.7740586598714192
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,40,1,128,1,float16,float16,0,0.8780746459960938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,40,1,128,1,float16,fp8,0,0.7648693720499674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,40,40,128,1,float16,float16,0,1.0531199773152669
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,40,40,128,1,float16,fp8,0,0.9672640164693197
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,40,2,128,1,float16,fp8,0,0.9416639804840088
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,40,2,128,1,float16,float16,0,0.8892640272776285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,40,4,128,1,float16,float16,0,0.9335520267486572
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,40,4,128,1,float16,fp8,0,0.8237813313802084
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,40,8,128,1,float16,fp8,0,0.8163839975992838
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,40,8,128,1,float16,float16,0,0.9416106541951498
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,40,1,128,1,float16,fp8,0,7.073061625162761
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,40,2,128,1,float16,fp8,0,8.34333864847819
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,40,1,128,1,float16,float16,0,12.100458780924479
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,40,2,128,1,float16,float16,0,11.767951965332031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,40,4,128,1,float16,fp8,0,8.331504185994467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,40,4,128,1,float16,float16,0,12.89861806233724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,40,8,128,1,float16,fp8,0,7.80729611714681
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,40,8,128,1,float16,float16,0,12.607733408610025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,40,1,128,1,float16,float16,0,4.049829483032227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,40,40,128,1,float16,float16,0,6.772922515869141
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,40,1,128,1,float16,fp8,0,3.584101359049479
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,40,40,128,1,float16,fp8,0,4.905706723531087
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,40,2,128,1,float16,fp8,0,3.6199092864990234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,40,4,128,1,float16,float16,0,4.394805272420247
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,40,4,128,1,float16,fp8,0,4.020634651184082
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,40,2,128,1,float16,float16,0,6.284938812255859
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,40,40,128,1,float16,float16,0,2.5888586044311523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,40,8,128,1,float16,float16,0,4.791765213012695
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,40,8,128,1,float16,fp8,0,3.9887145360310874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,40,40,128,1,float16,fp8,0,3.243765195210775
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,40,1,128,1,float16,fp8,0,1.8213920593261719
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,40,1,128,1,float16,float16,0,2.0804160435994468
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,40,2,128,1,float16,float16,0,2.1151466369628906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,40,2,128,1,float16,fp8,0,1.855552037556966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,40,4,128,1,float16,fp8,0,2.0879359245300293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,40,4,128,1,float16,float16,0,2.3131945927937827
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,40,8,128,1,float16,float16,0,2.236992041269938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,40,8,128,1,float16,fp8,0,2.0020267168680825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,40,1,128,1,float16,float16,0,1.117024024327596
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,40,1,128,1,float16,fp8,0,0.948479970296224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,40,40,128,1,float16,float16,0,1.351952075958252
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,40,40,128,1,float16,fp8,0,1.5213227272033691
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,40,2,128,1,float16,float16,0,1.1274080276489258
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,40,2,128,1,float16,fp8,0,0.9701973597208658
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,40,4,128,1,float16,fp8,0,1.052890698115031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,40,4,128,1,float16,float16,0,1.1862080097198486
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,40,8,128,1,float16,float16,0,1.1869440078735352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,40,8,128,1,float16,fp8,0,1.0642080307006836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,40,1,128,1,float16,float16,0,0.6095466613769531
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,40,1,128,1,float16,fp8,0,0.5206079880396525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,40,40,128,1,float16,fp8,0,0.6886560122172037
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,40,40,128,1,float16,float16,0,0.7300533453623453
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,40,2,128,1,float16,float16,0,0.6160480181376139
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,40,2,128,1,float16,fp8,0,0.5317226648330688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,40,4,128,1,float16,float16,0,0.6422293186187744
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,40,4,128,1,float16,fp8,0,0.5819520155588785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,40,8,128,1,float16,fp8,0,0.5799520015716553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,40,8,128,1,float16,float16,0,0.6389439900716146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,40,1,128,1,float16,float16,0,5.190554618835449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,40,1,128,1,float16,fp8,0,4.231007893880208
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,40,2,128,1,float16,fp8,0,4.30244795481364
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,40,2,128,1,float16,float16,0,4.781050682067871
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,40,4,128,1,float16,fp8,0,4.829087893168132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,40,8,128,1,float16,float16,0,5.46994145711263
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,40,4,128,1,float16,float16,0,5.722527821858724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,40,8,128,1,float16,fp8,0,4.6910826365153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,40,1,128,1,float16,fp8,0,2.112874666849772
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,40,1,128,1,float16,float16,0,2.571839968363444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,40,40,128,1,float16,float16,0,3.253397305806478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,40,40,128,1,float16,fp8,0,3.146090825398763
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,40,2,128,1,float16,float16,0,2.45305601755778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,40,2,128,1,float16,fp8,0,2.4760799407958984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,40,4,128,1,float16,float16,0,2.983541488647461
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,40,4,128,1,float16,fp8,0,2.3815199534098306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,40,8,128,1,float16,float16,0,2.688495953877767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,40,8,128,1,float16,fp8,0,2.466261386871338
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,40,40,128,1,float16,float16,0,1.6784480412801106
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,40,40,128,1,float16,fp8,0,1.6099519729614258
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,40,1,128,1,float16,fp8,0,1.1038560072580974
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,40,1,128,1,float16,float16,0,1.4579359690348308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,40,2,128,1,float16,fp8,0,1.1387680371602376
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,40,2,128,1,float16,float16,0,1.315066655476888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,40,4,128,1,float16,float16,0,1.391050656636556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,40,4,128,1,float16,fp8,0,1.3904852867126465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,40,8,128,1,float16,float16,0,1.3730346361796062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,40,8,128,1,float16,fp8,0,1.2789866924285889
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,40,40,128,1,float16,float16,0,0.8594613075256348
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,40,1,128,1,float16,float16,0,0.6812480290730795
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,40,1,128,1,float16,fp8,0,0.5869653224945068
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,40,40,128,1,float16,fp8,0,0.8390613396962484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,40,2,128,1,float16,float16,0,0.6952586968739828
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,40,2,128,1,float16,fp8,0,0.6080106496810913
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,40,4,128,1,float16,fp8,0,0.6567999919255575
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,40,4,128,1,float16,float16,0,0.7470933596293131
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,40,8,128,1,float16,float16,0,0.733680009841919
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,40,8,128,1,float16,fp8,0,0.6615466674168905
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,40,1,128,1,float16,float16,0,0.3768959840138753
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,40,40,128,1,float16,float16,0,0.47602665424346924
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,40,1,128,1,float16,fp8,0,0.3232640027999878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,40,40,128,1,float16,fp8,0,0.45508265495300293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,40,2,128,1,float16,float16,0,0.3816853364308675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,40,2,128,1,float16,fp8,0,0.33076266447703045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,40,4,128,1,float16,float16,0,0.410645325978597
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,40,4,128,1,float16,fp8,0,0.3673066695531209
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,40,8,128,1,float16,fp8,0,0.36615467071533203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,40,8,128,1,float16,float16,0,0.407589316368103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,40,1,128,1,float16,fp8,0,4.044858614603679
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,40,1,128,1,float16,float16,0,4.634976069132487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,40,2,128,1,float16,fp8,0,4.170976003011067
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,40,2,128,1,float16,float16,0,5.115930557250977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,40,4,128,1,float16,float16,0,5.497055689493815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,40,4,128,1,float16,fp8,0,4.9334719975789385
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,40,8,128,1,float16,fp8,0,4.759093284606934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,40,8,128,1,float16,float16,0,6.510058720906575
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,40,1,128,1,float16,float16,0,2.3376320203145347
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,40,1,128,1,float16,fp8,0,2.0321173667907715
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,40,40,128,1,float16,float16,0,3.3342933654785156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,40,40,128,1,float16,fp8,0,3.3985280990600586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,40,2,128,1,float16,float16,0,2.5107946395874023
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,40,2,128,1,float16,fp8,0,2.1129813194274902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,40,4,128,1,float16,float16,0,2.640234629313151
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,40,4,128,1,float16,fp8,0,2.5193653106689453
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,40,8,128,1,float16,float16,0,2.6613173484802246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,40,8,128,1,float16,fp8,0,2.5067572593688965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,40,40,128,1,float16,float16,0,1.7146186828613281
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,40,40,128,1,float16,fp8,0,1.729904015858968
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,40,1,128,1,float16,fp8,0,1.0446613629659016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,40,1,128,1,float16,float16,0,1.553376038869222
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,40,2,128,1,float16,fp8,0,1.0972853501637776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,40,2,128,1,float16,float16,0,1.2293226718902588
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,40,4,128,1,float16,fp8,0,1.226746638615926
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,40,4,128,1,float16,float16,0,1.3816693623860676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,40,8,128,1,float16,fp8,0,1.2405280272165935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,40,8,128,1,float16,float16,0,1.3780746459960938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,40,1,128,1,float16,float16,0,0.6328213214874268
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,40,40,128,1,float16,float16,0,0.8999199867248535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,40,40,128,1,float16,fp8,0,0.8936746915181478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,40,1,128,1,float16,fp8,0,0.5539146661758423
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,40,2,128,1,float16,float16,0,0.6482133467992147
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,40,4,128,1,float16,fp8,0,0.6582719882329305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,40,2,128,1,float16,fp8,0,0.5837013324101766
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,40,4,128,1,float16,float16,0,0.7317866484324137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,40,8,128,1,float16,float16,0,0.7108000119527181
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,40,8,128,1,float16,fp8,0,0.6553279956181844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,40,1,128,1,float16,float16,0,0.34649066130320233
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,40,1,128,1,float16,fp8,0,0.3062826593716939
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,40,40,128,1,float16,fp8,0,0.47386133670806885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,40,40,128,1,float16,float16,0,0.4865119854609172
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,40,2,128,1,float16,float16,0,0.3549813429514567
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,40,2,128,1,float16,fp8,0,0.3171146710713704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,40,4,128,1,float16,float16,0,0.39633599917093915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,40,8,128,1,float16,float16,0,0.3987199862798055
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,40,4,128,1,float16,fp8,0,0.3598346710205078
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,40,8,128,1,float16,fp8,0,0.3596533139546712
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,40,1,128,1,float16,float16,0,0.19285333156585693
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,40,1,128,1,float16,fp8,0,0.17195733388264975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,40,2,128,1,float16,float16,0,0.19579199949900308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,40,40,128,1,float16,fp8,0,0.26288533210754395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,40,2,128,1,float16,fp8,0,0.17709867159525552
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,40,40,128,1,float16,float16,0,0.2782026727994283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,40,4,128,1,float16,float16,0,0.2105706731478373
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,40,4,128,1,float16,fp8,0,0.19362133741378784
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,40,8,128,1,float16,fp8,0,0.19370132684707642
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,40,8,128,1,float16,float16,0,0.21781333287556967
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,40,1,128,1,float16,fp8,0,2.4722827275594077
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,40,1,128,1,float16,float16,0,2.802687962849935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,40,2,128,1,float16,float16,0,2.809328079223633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,40,2,128,1,float16,fp8,0,2.5830507278442383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,40,4,128,1,float16,float16,0,3.3369547526041665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,40,8,128,1,float16,fp8,0,3.0686985651652017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,40,4,128,1,float16,fp8,0,3.1516745885213218
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,40,8,128,1,float16,float16,0,3.198479970296224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,40,1,128,1,float16,fp8,0,1.2537706693013508
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,40,1,128,1,float16,float16,0,1.434666633605957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,40,40,128,1,float16,float16,0,2.1965813636779785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,40,40,128,1,float16,fp8,0,2.2774346669514975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,40,2,128,1,float16,float16,0,1.4494986534118652
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,40,2,128,1,float16,fp8,0,1.3183733622233074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,40,4,128,1,float16,float16,0,1.6821386019388835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,40,4,128,1,float16,fp8,0,1.56550931930542
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,40,8,128,1,float16,float16,0,1.6806666056315105
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,40,8,128,1,float16,fp8,0,1.558234691619873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,40,1,128,1,float16,fp8,0,0.660261352856954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,40,1,128,1,float16,float16,0,0.7365012963612875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,40,2,128,1,float16,float16,0,0.7748479843139648
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,40,40,128,1,float16,fp8,0,1.1631519794464111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,40,40,128,1,float16,float16,0,1.1305546760559082
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,40,2,128,1,float16,fp8,0,0.6834932963053385
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,40,4,128,1,float16,fp8,0,0.8056746323903402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,40,4,128,1,float16,float16,0,0.8792479832967123
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,40,8,128,1,float16,float16,0,0.8663679758707682
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,40,8,128,1,float16,fp8,0,0.7966079711914062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,40,40,128,1,float16,float16,0,0.5974826812744141
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,40,1,128,1,float16,float16,0,0.3959893385569255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,40,2,128,1,float16,float16,0,0.41679465770721436
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,40,40,128,1,float16,fp8,0,0.6061919927597046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,40,1,128,1,float16,fp8,0,0.3529386520385742
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,40,2,128,1,float16,fp8,0,0.37300264835357666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,40,4,128,1,float16,float16,0,0.46650667985280353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,40,4,128,1,float16,fp8,0,0.44485334555308026
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,40,8,128,1,float16,float16,0,0.46376534303029376
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,40,8,128,1,float16,fp8,0,0.43167467912038165
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,40,1,128,1,float16,float16,0,0.22016533215840658
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,40,1,128,1,float16,fp8,0,0.19368000825246176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,40,40,128,1,float16,float16,0,0.33510398864746094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,40,40,128,1,float16,fp8,0,0.3274773359298706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,40,2,128,1,float16,fp8,0,0.19885865847269693
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,40,2,128,1,float16,float16,0,0.2271946668624878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,40,4,128,1,float16,float16,0,0.25674132506052655
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,40,4,128,1,float16,fp8,0,0.23899734020233154
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,40,8,128,1,float16,float16,0,0.2526613275210063
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,40,8,128,1,float16,fp8,0,0.23962666591008505
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,40,1,128,1,float16,float16,0,0.12527466813723245
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,40,1,128,1,float16,fp8,0,0.11421866218249004
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,40,40,128,1,float16,float16,0,0.19474667310714722
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,40,2,128,1,float16,float16,0,0.12813333670298258
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,40,40,128,1,float16,fp8,0,0.18383467197418213
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,40,2,128,1,float16,fp8,0,0.1174720029036204
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,40,4,128,1,float16,float16,0,0.13784533739089966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,40,4,128,1,float16,fp8,0,0.12894933422406515
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,40,8,128,1,float16,float16,0,0.1378613313039144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,40,8,128,1,float16,fp8,0,0.13009599844614664
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,40,1,128,1,float16,float16,0,2.7225866317749023
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,40,1,128,1,float16,fp8,0,2.4881866772969565
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,40,2,128,1,float16,float16,0,2.8683627446492515
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,40,2,128,1,float16,fp8,0,2.6566239992777505
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,40,4,128,1,float16,fp8,0,3.087045351664225
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,40,4,128,1,float16,float16,0,3.4365545908610025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,40,8,128,1,float16,float16,0,3.431141217549642
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,40,8,128,1,float16,fp8,0,3.2287572224934897
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,40,1,128,1,float16,float16,0,1.4095519383748372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,40,40,128,1,float16,fp8,0,2.6306559244791665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,40,40,128,1,float16,float16,0,2.4826186498006186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,40,1,128,1,float16,fp8,0,1.273045301437378
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,40,2,128,1,float16,float16,0,1.476794719696045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,40,2,128,1,float16,fp8,0,1.35263458887736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,40,4,128,1,float16,float16,0,1.7751839955647786
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,40,4,128,1,float16,fp8,0,1.748458703358968
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,40,8,128,1,float16,float16,0,1.7635092735290527
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,40,8,128,1,float16,fp8,0,1.6431999206542969
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,40,1,128,1,float16,float16,0,0.7327253023783366
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,40,40,128,1,float16,fp8,0,1.3379626274108887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,40,40,128,1,float16,float16,0,1.268613338470459
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,40,1,128,1,float16,fp8,0,0.662559986114502
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,40,2,128,1,float16,float16,0,0.775871992111206
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,40,2,128,1,float16,fp8,0,0.7031413714090983
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,40,4,128,1,float16,float16,0,0.9259413083394369
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,40,4,128,1,float16,fp8,0,0.8328586419423422
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,40,8,128,1,float16,float16,0,0.893285353978475
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,40,8,128,1,float16,fp8,0,0.9043839772542318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,40,1,128,1,float16,float16,0,0.39105598131815594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,40,40,128,1,float16,float16,0,0.6610293388366699
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,40,40,128,1,float16,fp8,0,0.6884533564249674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,40,1,128,1,float16,fp8,0,0.34667734305063885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,40,2,128,1,float16,float16,0,0.40748266379038495
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,40,2,128,1,float16,fp8,0,0.37328000863393146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,40,4,128,1,float16,float16,0,0.49483199914296466
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,40,4,128,1,float16,fp8,0,0.4452960093816121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,40,8,128,1,float16,float16,0,0.49675198396046955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,40,8,128,1,float16,fp8,0,0.45157865683237713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,40,1,128,1,float16,float16,0,0.2149440050125122
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,40,1,128,1,float16,fp8,0,0.192522664864858
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,40,40,128,1,float16,fp8,0,0.36181334654490155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,40,40,128,1,float16,float16,0,0.3601653178532918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,40,2,128,1,float16,float16,0,0.2230400045712789
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,40,2,128,1,float16,fp8,0,0.20355733235677084
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,40,4,128,1,float16,float16,0,0.25840532779693604
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,40,4,128,1,float16,fp8,0,0.2497333288192749
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,40,8,128,1,float16,float16,0,0.2661813298861186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,40,8,128,1,float16,fp8,0,0.24810665845870972
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,40,1,128,1,float16,float16,0,0.1174720029036204
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,40,1,128,1,float16,fp8,0,0.10550933082898457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,40,2,128,1,float16,float16,0,0.12160000205039978
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,40,2,128,1,float16,fp8,0,0.11188266674677531
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,40,40,128,1,float16,float16,0,0.20721600453058878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,40,40,128,1,float16,fp8,0,0.198362668355306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,40,4,128,1,float16,float16,0,0.1349013348420461
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,40,4,128,1,float16,fp8,0,0.12863999605178833
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,40,8,128,1,float16,float16,0,0.14293866356213888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,40,8,128,1,float16,fp8,0,0.12893333037694296
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,40,1,128,1,float16,float16,0,0.07029866675535838
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,40,1,128,1,float16,fp8,0,0.06654933094978333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,40,2,128,1,float16,float16,0,0.07214400172233582
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,40,40,128,1,float16,float16,0,0.11689066886901855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,40,2,128,1,float16,fp8,0,0.06866666674613953
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,40,40,128,1,float16,fp8,0,0.10415466626485188
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,40,4,128,1,float16,float16,0,0.07773333291212718
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,40,4,128,1,float16,fp8,0,0.07709866762161255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,40,8,128,1,float16,float16,0,0.07926933467388153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,40,8,128,1,float16,fp8,0,0.07713599999745686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,40,1,128,1,float16,float16,0,1.731642723083496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,40,1,128,1,float16,fp8,0,1.5919893582661946
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,40,2,128,1,float16,float16,0,1.8352746963500977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,40,2,128,1,float16,fp8,0,1.7252747217814128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,40,4,128,1,float16,float16,0,2.340336004892985
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,40,4,128,1,float16,fp8,0,2.2526453336079917
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,40,8,128,1,float16,fp8,0,2.140000025431315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,40,8,128,1,float16,float16,0,2.2174933751424155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,40,40,128,1,float16,float16,0,1.688037395477295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,40,1,128,1,float16,float16,0,0.9146453539530436
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,40,1,128,1,float16,fp8,0,0.8180106480916342
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,40,40,128,1,float16,fp8,0,1.8432639439900715
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,40,2,128,1,float16,float16,0,0.952725330988566
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,40,2,128,1,float16,fp8,0,0.8792533079783121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,40,4,128,1,float16,float16,0,1.2076959609985352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,40,8,128,1,float16,float16,0,1.1538293361663818
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,40,4,128,1,float16,fp8,0,1.0785226821899414
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,40,8,128,1,float16,fp8,0,1.1245280106862385
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,40,1,128,1,float16,float16,0,0.4755733410517375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,40,40,128,1,float16,float16,0,0.8662880261739095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,40,1,128,1,float16,fp8,0,0.4304800033569336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,40,40,128,1,float16,fp8,0,0.938437302907308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,40,2,128,1,float16,float16,0,0.5036106506983439
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,40,2,128,1,float16,fp8,0,0.46133331457773846
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,40,4,128,1,float16,float16,0,0.6200960079828898
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,40,4,128,1,float16,fp8,0,0.589136004447937
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,40,8,128,1,float16,float16,0,0.6292800108591715
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,40,8,128,1,float16,fp8,0,0.5908213456471761
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,40,1,128,1,float16,float16,0,0.255349338054657
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,40,40,128,1,float16,float16,0,0.4675306479136149
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,40,1,128,1,float16,fp8,0,0.2319200038909912
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,40,40,128,1,float16,fp8,0,0.48554134368896484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,40,2,128,1,float16,float16,0,0.2702133258183797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,40,2,128,1,float16,fp8,0,0.25275200605392456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,40,4,128,1,float16,float16,0,0.3344106674194336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,40,4,128,1,float16,fp8,0,0.31835200389226276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,40,8,128,1,float16,float16,0,0.3273013234138489
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,40,1,128,1,float16,float16,0,0.14131733775138855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,40,8,128,1,float16,fp8,0,0.3081173300743103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,40,1,128,1,float16,fp8,0,0.12531200051307678
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,40,40,128,1,float16,float16,0,0.26254934072494507
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,40,2,128,1,float16,float16,0,0.14972800016403198
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,40,40,128,1,float16,fp8,0,0.25941866636276245
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,40,2,128,1,float16,fp8,0,0.13317867120107016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,40,4,128,1,float16,float16,0,0.17930134137471518
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,40,4,128,1,float16,fp8,0,0.17093332608540854
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,40,8,128,1,float16,float16,0,0.17683732509613037
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,40,1,128,1,float16,float16,0,0.0784693310658137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,40,8,128,1,float16,fp8,0,0.17312532663345337
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,40,1,128,1,float16,fp8,0,0.07266133526961009
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,40,2,128,1,float16,float16,0,0.08141333361466725
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,40,40,128,1,float16,float16,0,0.14818132917086282
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,40,40,128,1,float16,fp8,0,0.14387200276056925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,40,2,128,1,float16,fp8,0,0.07781333227952321
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,40,4,128,1,float16,float16,0,0.09090666969617207
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,40,8,128,1,float16,fp8,0,0.09132267038027446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,40,8,128,1,float16,float16,0,0.09125333031018575
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,40,4,128,1,float16,fp8,0,0.08944533268610637
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,40,1,128,1,float16,fp8,0,0.049786667029062905
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,40,1,128,1,float16,float16,0,0.052202666799227394
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,40,2,128,1,float16,float16,0,0.05445333321889242
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,40,40,128,1,float16,float16,0,0.07902400195598602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,40,2,128,1,float16,fp8,0,0.05064000189304352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,40,40,128,1,float16,fp8,0,0.07692799965540568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,40,4,128,1,float16,float16,0,0.05825066566467285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,40,4,128,1,float16,fp8,0,0.058549334605534874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,40,8,128,1,float16,float16,0,0.057904000083605446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,40,8,128,1,float16,fp8,0,0.05797333518664042
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,40,1,128,1,float16,float16,0,1.866410732269287
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,40,1,128,1,float16,fp8,0,1.7440320650736492
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,40,2,128,1,float16,float16,0,2.0222934087117515
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,40,2,128,1,float16,fp8,0,1.8994080225626628
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,40,4,128,1,float16,float16,0,2.671658515930176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,40,4,128,1,float16,fp8,0,2.351967970530192
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,40,8,128,1,float16,fp8,0,2.461653391520182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,40,8,128,1,float16,float16,0,2.533573309580485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,40,1,128,1,float16,float16,0,0.9630346298217773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,40,1,128,1,float16,fp8,0,0.8829867045084635
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,40,2,128,1,float16,float16,0,1.035317341486613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,40,40,128,1,float16,fp8,0,2.2455573081970215
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,40,40,128,1,float16,float16,0,2.00652805964152
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,40,2,128,1,float16,fp8,0,0.957157293955485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,40,4,128,1,float16,float16,0,1.3483093579610188
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,40,8,128,1,float16,fp8,0,1.3556586901346843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,40,8,128,1,float16,float16,0,1.344490687052409
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,40,4,128,1,float16,fp8,0,1.355178674062093
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,40,1,128,1,float16,float16,0,0.5118506749471029
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,40,1,128,1,float16,fp8,0,0.4601759910583496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,40,2,128,1,float16,float16,0,0.5518506765365601
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,40,40,128,1,float16,fp8,0,1.1369919776916504
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,40,40,128,1,float16,float16,0,1.0282560189565022
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,40,2,128,1,float16,fp8,0,0.5046133200327555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,40,4,128,1,float16,float16,0,0.6885546843210856
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,40,8,128,1,float16,float16,0,0.683456023534139
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,40,8,128,1,float16,fp8,0,0.6501760085423788
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,40,4,128,1,float16,fp8,0,0.660207986831665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,40,1,128,1,float16,float16,0,0.2668000062306722
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,40,1,128,1,float16,fp8,0,0.2414720058441162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,40,40,128,1,float16,fp8,0,0.5864959955215454
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,40,40,128,1,float16,float16,0,0.5308586756388346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,40,2,128,1,float16,float16,0,0.29155733187993366
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,40,2,128,1,float16,fp8,0,0.2648853262265523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,40,4,128,1,float16,float16,0,0.38051732381184894
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,40,4,128,1,float16,fp8,0,0.37034134070078534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,40,8,128,1,float16,float16,0,0.3508426745732625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,40,8,128,1,float16,fp8,0,0.34622931480407715
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,40,1,128,1,float16,float16,0,0.14843733112017313
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,40,40,128,1,float16,float16,0,0.278602659702301
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,40,40,128,1,float16,fp8,0,0.3067786693572998
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,40,1,128,1,float16,fp8,0,0.13548800349235535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,40,2,128,1,float16,float16,0,0.15879467129707336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,40,2,128,1,float16,fp8,0,0.14852266510327658
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,40,4,128,1,float16,float16,0,0.1930933396021525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,40,4,128,1,float16,fp8,0,0.18535999457041422
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,40,8,128,1,float16,float16,0,0.20090667406717935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,40,8,128,1,float16,fp8,0,0.1859253247578939
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,40,1,128,1,float16,float16,0,0.08085866769154866
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,40,1,128,1,float16,fp8,0,0.07330133517583211
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,40,2,128,1,float16,float16,0,0.08382399876912434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,40,2,128,1,float16,fp8,0,0.07887466748555501
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,40,40,128,1,float16,fp8,0,0.16536532839139303
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,40,4,128,1,float16,float16,0,0.10231467088063557
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,40,40,128,1,float16,float16,0,0.1514293352762858
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,40,4,128,1,float16,fp8,0,0.09372799595197041
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,40,8,128,1,float16,fp8,0,0.09689066807428996
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,40,8,128,1,float16,float16,0,0.10458667079607646
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,40,1,128,1,float16,float16,0,0.050010666251182556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,40,40,128,1,float16,float16,0,0.0913706620534261
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,40,1,128,1,float16,fp8,0,0.04587733248869578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,40,2,128,1,float16,float16,0,0.050623998045921326
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,40,40,128,1,float16,fp8,0,0.08203200002511342
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,40,4,128,1,float16,fp8,0,0.05629333357016245
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,40,4,128,1,float16,float16,0,0.056976000467936196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,40,2,128,1,float16,fp8,0,0.04804799954096476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,40,8,128,1,float16,float16,0,0.05604266623655955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,40,1,128,1,float16,float16,0,0.03585600107908249
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,40,8,128,1,float16,fp8,0,0.05596800148487091
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,40,1,128,1,float16,fp8,0,0.033471999069054924
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,40,2,128,1,float16,float16,0,0.03743999948104223
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,40,40,128,1,float16,fp8,0,0.052042668064435325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,40,2,128,1,float16,fp8,0,0.035445332527160645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,40,4,128,1,float16,float16,0,0.03979733337958654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,40,40,128,1,float16,float16,0,0.04552533229192098
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,40,8,128,1,float16,fp8,0,0.03972266614437103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,40,4,128,1,float16,fp8,0,0.03940266619126002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,40,8,128,1,float16,float16,0,0.03955200066169103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,40,1,128,1,float16,fp8,0,1.3533387184143066
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,40,1,128,1,float16,float16,0,1.4487199783325195
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,40,2,128,1,float16,float16,0,1.5842560132344563
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,40,2,128,1,float16,fp8,0,1.5006133715311687
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,40,4,128,1,float16,fp8,0,1.9586666425069172
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,40,4,128,1,float16,float16,0,2.1343839963277182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,40,8,128,1,float16,float16,0,2.19812806447347
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,40,8,128,1,float16,fp8,0,2.0735839207967124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,40,40,128,1,float16,float16,0,1.777440071105957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,40,40,128,1,float16,fp8,0,2.042517344156901
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,40,1,128,1,float16,fp8,0,0.6811359723409017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,40,1,128,1,float16,float16,0,0.7436800003051758
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,40,2,128,1,float16,float16,0,0.8097439606984457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,40,2,128,1,float16,fp8,0,0.7696106433868408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,40,4,128,1,float16,float16,0,1.1065759658813477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,40,4,128,1,float16,fp8,0,1.01255997021993
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,40,8,128,1,float16,float16,0,1.1507253646850586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,40,40,128,1,float16,float16,0,0.9154079755147299
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,40,8,128,1,float16,fp8,0,1.0734879970550537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,40,40,128,1,float16,fp8,0,1.0335946877797444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,40,1,128,1,float16,fp8,0,0.3568106492360433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,40,1,128,1,float16,float16,0,0.39168532689412433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,40,2,128,1,float16,fp8,0,0.40297067165374756
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,40,2,128,1,float16,float16,0,0.4336213270823161
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,40,4,128,1,float16,fp8,0,0.5285333395004272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,40,4,128,1,float16,float16,0,0.5757493178049723
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,40,8,128,1,float16,float16,0,0.5852320194244385
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,40,8,128,1,float16,fp8,0,0.5835306644439697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,40,1,128,1,float16,float16,0,0.21206400791803995
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,40,1,128,1,float16,fp8,0,0.1904533306757609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,40,40,128,1,float16,fp8,0,0.5331466595331827
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,40,40,128,1,float16,float16,0,0.46986667315165204
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,40,2,128,1,float16,float16,0,0.23063466946283975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,40,2,128,1,float16,fp8,0,0.21719467639923096
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,40,4,128,1,float16,float16,0,0.31276265780131024
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,40,4,128,1,float16,fp8,0,0.3193440039952596
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,40,8,128,1,float16,float16,0,0.2896053393681844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,40,8,128,1,float16,fp8,0,0.2882026632626851
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,40,1,128,1,float16,float16,0,0.11514133214950562
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,40,1,128,1,float16,fp8,0,0.10737066467603047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,40,40,128,1,float16,fp8,0,0.27535466353098553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,40,40,128,1,float16,float16,0,0.24553600947062174
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,40,2,128,1,float16,float16,0,0.12173866232236226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,40,2,128,1,float16,fp8,0,0.116949329773585
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,40,4,128,1,float16,float16,0,0.17022399107615152
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,40,4,128,1,float16,fp8,0,0.16155733664830527
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,40,8,128,1,float16,float16,0,0.15703466534614563
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,40,8,128,1,float16,fp8,0,0.16845866044362387
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,40,1,128,1,float16,float16,0,0.061946665247281395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,40,1,128,1,float16,fp8,0,0.05832533538341522
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,40,40,128,1,float16,fp8,0,0.15013866623242697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,40,40,128,1,float16,float16,0,0.13261866569519043
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,40,2,128,1,float16,float16,0,0.0664213349421819
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,40,2,128,1,float16,fp8,0,0.06325866778691609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,40,4,128,1,float16,float16,0,0.08833600083986919
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,40,4,128,1,float16,fp8,0,0.0784853349129359
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,40,8,128,1,float16,fp8,0,0.0805866668621699
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,40,8,128,1,float16,float16,0,0.08549333612124126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,40,1,128,1,float16,float16,0,0.039306665460268654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,40,1,128,1,float16,fp8,0,0.037589333951473236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,40,40,128,1,float16,float16,0,0.07323200007279713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,40,40,128,1,float16,fp8,0,0.07498666644096375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,40,2,128,1,float16,float16,0,0.04045333216587702
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,40,2,128,1,float16,fp8,0,0.03947199881076813
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,40,4,128,1,float16,float16,0,0.0458133320013682
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,40,4,128,1,float16,fp8,0,0.04604266583919525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,40,8,128,1,float16,fp8,0,0.04769066472848257
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,40,8,128,1,float16,float16,0,0.0462666650613149
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,40,1,128,1,float16,float16,0,0.029461334149042766
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,40,1,128,1,float16,fp8,0,0.0276053324341774
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,40,40,128,1,float16,float16,0,0.0378506655494372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,40,40,128,1,float16,fp8,0,0.046207999189694725
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,40,2,128,1,float16,float16,0,0.030671998858451843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,40,2,128,1,float16,fp8,0,0.02942933390537898
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,40,4,128,1,float16,float16,0,0.03254399945338567
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,40,4,128,1,float16,fp8,0,0.03336533407370249
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,40,8,128,1,float16,fp8,0,0.03356799980004629
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,40,8,128,1,float16,float16,0,0.03266133368015289
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,40,1,128,1,float16,float16,0,0.0210506667693456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,40,1,128,1,float16,fp8,0,0.021168000996112823
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,40,2,128,1,float16,float16,0,0.02117866774400075
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,40,2,128,1,float16,fp8,0,0.02102400114138921
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,40,40,128,1,float16,float16,0,0.025472000241279602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,40,40,128,1,float16,fp8,0,0.029578665892283123
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,40,4,128,1,float16,float16,0,0.022965334355831146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,40,4,128,1,float16,fp8,0,0.02295999974012375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,40,8,128,1,float16,float16,0,0.023365333676338196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,40,8,128,1,float16,fp8,0,0.023402666052182514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,40,1,128,1,float16,float16,0,0.6123093366622925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,40,1,128,1,float16,fp8,0,0.5773760080337524
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,40,2,128,1,float16,float16,0,0.6850666999816895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,40,2,128,1,float16,fp8,0,0.6549866596857706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,40,4,128,1,float16,float16,0,0.9998346964518229
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,40,4,128,1,float16,fp8,0,1.0500000317891438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,40,8,128,1,float16,float16,0,1.0192480087280273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,40,8,128,1,float16,fp8,0,0.9422612984975179
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,40,1,128,1,float16,float16,0,0.31806399424870807
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,40,1,128,1,float16,fp8,0,0.29602134227752686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,40,40,128,1,float16,float16,0,0.8404373327891032
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,40,40,128,1,float16,fp8,0,0.9772746562957764
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,40,2,128,1,float16,float16,0,0.3620213270187378
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,40,2,128,1,float16,fp8,0,0.34250664710998535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,40,4,128,1,float16,float16,0,0.4927519957224528
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,40,4,128,1,float16,fp8,0,0.47273067633310956
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,40,8,128,1,float16,float16,0,0.5033386548360189
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,40,8,128,1,float16,fp8,0,0.5282880067825317
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,40,40,128,1,float16,float16,0,0.43080000082651776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,40,40,128,1,float16,fp8,0,0.5000746647516886
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,40,1,128,1,float16,float16,0,0.1679733395576477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,40,1,128,1,float16,fp8,0,0.15763200322786966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,40,2,128,1,float16,float16,0,0.19300800561904907
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,40,2,128,1,float16,fp8,0,0.18402665853500366
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,40,4,128,1,float16,float16,0,0.2715253432591756
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,40,4,128,1,float16,fp8,0,0.28059200445810956
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,40,8,128,1,float16,float16,0,0.2530826727549235
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,40,8,128,1,float16,fp8,0,0.2536906599998474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,40,1,128,1,float16,float16,0,0.0934826632340749
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,40,40,128,1,float16,float16,0,0.2265546719233195
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,40,1,128,1,float16,fp8,0,0.08949333429336548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,40,40,128,1,float16,fp8,0,0.2602826754252116
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,40,2,128,1,float16,float16,0,0.1051626702149709
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,40,4,128,1,float16,float16,0,0.13794666528701782
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,40,2,128,1,float16,fp8,0,0.10245866576830547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,40,4,128,1,float16,fp8,0,0.14044800400733948
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,40,8,128,1,float16,float16,0,0.1453333298365275
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,40,8,128,1,float16,fp8,0,0.1397119959195455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,40,1,128,1,float16,float16,0,0.05386666456858317
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,40,40,128,1,float16,float16,0,0.12410133083661397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,40,2,128,1,float16,float16,0,0.05620799958705902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,40,1,128,1,float16,fp8,0,0.05068266888459524
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,40,40,128,1,float16,fp8,0,0.14145066340764365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,40,4,128,1,float16,fp8,0,0.07052800059318542
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,40,4,128,1,float16,float16,0,0.07277333239714305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,40,2,128,1,float16,fp8,0,0.05597866574923197
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,40,8,128,1,float16,float16,0,0.0767680009206136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,40,8,128,1,float16,fp8,0,0.07216000060240428
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,40,1,128,1,float16,float16,0,0.03555200000603994
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,40,1,128,1,float16,fp8,0,0.03339199970165888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,40,40,128,1,float16,fp8,0,0.07252266506354015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,40,2,128,1,float16,float16,0,0.03755199909210205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,40,40,128,1,float16,float16,0,0.07030400137106578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,40,2,128,1,float16,fp8,0,0.035562666753927864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,40,4,128,1,float16,fp8,0,0.04367466767628988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,40,4,128,1,float16,float16,0,0.041749333341916404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,40,8,128,1,float16,float16,0,0.04185600082079569
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,40,8,128,1,float16,fp8,0,0.04362666606903076
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,40,1,128,1,float16,float16,0,0.025274666647116344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,40,1,128,1,float16,fp8,0,0.025248001019159954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,40,40,128,1,float16,fp8,0,0.04357333481311798
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,40,2,128,1,float16,float16,0,0.0271573339899381
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,40,40,128,1,float16,float16,0,0.0353973334034284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,40,2,128,1,float16,fp8,0,0.027232001225153606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,40,4,128,1,float16,float16,0,0.02935466667016347
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,40,4,128,1,float16,fp8,0,0.029578665892283123
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,40,8,128,1,float16,fp8,0,0.030906667311986286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,40,8,128,1,float16,float16,0,0.029482667644818623
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,40,1,128,1,float16,float16,0,0.01916266605257988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,40,1,128,1,float16,fp8,0,0.01852799952030182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,40,2,128,1,float16,float16,0,0.018789333601792652
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,40,40,128,1,float16,float16,0,0.023157333334287006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,40,40,128,1,float16,fp8,0,0.02735466758410136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,40,2,128,1,float16,fp8,0,0.01741333305835724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,40,4,128,1,float16,float16,0,0.01971199984351794
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,40,4,128,1,float16,fp8,0,0.019130667050679524
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,40,8,128,1,float16,float16,0,0.019487999379634857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,40,8,128,1,float16,fp8,0,0.021317332983016968
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,40,1,128,1,float16,float16,0,0.017338667064905167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,40,40,128,1,float16,fp8,0,0.021146667500336964
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,40,1,128,1,float16,fp8,0,0.017525333911180496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,40,40,128,1,float16,float16,0,0.019280000279347103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,40,2,128,1,float16,float16,0,0.017850667238235474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,40,2,128,1,float16,fp8,0,0.01748266691962878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,40,4,128,1,float16,float16,0,0.017231999586025875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,40,4,128,1,float16,fp8,0,0.017557332913080852
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,40,8,128,1,float16,fp8,0,0.01711999997496605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,40,8,128,1,float16,float16,0,0.0174346665541331
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,40,1,128,1,float16,float16,0,0.3712799946467082
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,40,1,128,1,float16,fp8,0,0.34333332379659015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,40,2,128,1,float16,float16,0,0.41226665178934735
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,40,2,128,1,float16,fp8,0,0.38811735312143963
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,40,4,128,1,float16,float16,0,0.576527992884318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,40,4,128,1,float16,fp8,0,0.5199840068817139
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,40,8,128,1,float16,float16,0,0.5661760171254476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,40,40,128,1,float16,fp8,0,0.5212159951527914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,40,1,128,1,float16,float16,0,0.19542400042215982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,40,40,128,1,float16,float16,0,0.456928014755249
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,40,8,128,1,float16,fp8,0,0.560421347618103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,40,1,128,1,float16,fp8,0,0.18256000677744547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,40,2,128,1,float16,float16,0,0.21320533752441406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,40,2,128,1,float16,fp8,0,0.20768000682195029
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,40,4,128,1,float16,float16,0,0.2981226642926534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,40,4,128,1,float16,fp8,0,0.3049333294232686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,40,8,128,1,float16,float16,0,0.27698665857315063
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,40,8,128,1,float16,fp8,0,0.2786453366279602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,40,1,128,1,float16,float16,0,0.10749866565068562
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,40,1,128,1,float16,fp8,0,0.09937066833178203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,40,40,128,1,float16,float16,0,0.23427732785542807
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,40,40,128,1,float16,fp8,0,0.2691733241081238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,40,2,128,1,float16,float16,0,0.11321066816647847
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,40,2,128,1,float16,fp8,0,0.11012799541155498
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,40,4,128,1,float16,float16,0,0.1548853317896525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,40,4,128,1,float16,fp8,0,0.15350932876269022
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,40,8,128,1,float16,fp8,0,0.16082666317621866
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,40,8,128,1,float16,float16,0,0.14800000190734863
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,40,40,128,1,float16,float16,0,0.12558933099110922
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,40,1,128,1,float16,float16,0,0.057002668579419456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,40,40,128,1,float16,fp8,0,0.1441333293914795
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,40,1,128,1,float16,fp8,0,0.05413866539796194
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,40,2,128,1,float16,float16,0,0.06250666578610738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,40,2,128,1,float16,fp8,0,0.058320000767707825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,40,4,128,1,float16,float16,0,0.07539199789365132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,40,4,128,1,float16,fp8,0,0.07455466687679291
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,40,8,128,1,float16,fp8,0,0.07494399944941203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,40,8,128,1,float16,float16,0,0.0839413305123647
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,40,1,128,1,float16,float16,0,0.03443199892838796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,40,1,128,1,float16,fp8,0,0.033173332611719765
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,40,40,128,1,float16,float16,0,0.06833066542943318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,40,40,128,1,float16,fp8,0,0.07082666456699371
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,40,2,128,1,float16,float16,0,0.03570666660865148
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,40,2,128,1,float16,fp8,0,0.03526933242877325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,40,4,128,1,float16,float16,0,0.0415786678592364
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,40,8,128,1,float16,float16,0,0.041834667325019836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,40,4,128,1,float16,fp8,0,0.043680002291997276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,40,8,128,1,float16,fp8,0,0.043712000052134194
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,40,1,128,1,float16,float16,0,0.023525332411130268
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,40,1,128,1,float16,fp8,0,0.023152001202106476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,40,2,128,1,float16,float16,0,0.02518933266401291
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,40,2,128,1,float16,fp8,0,0.023520000278949738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,40,40,128,1,float16,fp8,0,0.04153066625197729
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,40,4,128,1,float16,float16,0,0.02741333345572154
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,40,40,128,1,float16,float16,0,0.03350399931271871
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,40,4,128,1,float16,fp8,0,0.027232001225153606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,40,8,128,1,float16,float16,0,0.02770666778087616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,40,8,128,1,float16,fp8,0,0.029045333464940388
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,40,1,128,1,float16,float16,0,0.019018666197856266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,40,1,128,1,float16,fp8,0,0.0180479995906353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,40,2,128,1,float16,float16,0,0.01926400015751521
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,40,40,128,1,float16,float16,0,0.023418667415777843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,40,2,128,1,float16,fp8,0,0.01884799947341283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,40,40,128,1,float16,fp8,0,0.027242665489514668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,40,4,128,1,float16,float16,0,0.019354666272799175
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,40,4,128,1,float16,fp8,0,0.02041600023706754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,40,8,128,1,float16,float16,0,0.01960533360640208
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,40,8,128,1,float16,fp8,0,0.02004266654451688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,40,1,128,1,float16,float16,0,0.014015999933083853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,40,1,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,40,2,128,1,float16,float16,0,0.01451733335852623
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,40,2,128,1,float16,fp8,0,0.013253333667914072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,40,40,128,1,float16,float16,0,0.016906666258970898
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,40,40,128,1,float16,fp8,0,0.01844800015290578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,40,4,128,1,float16,float16,0,0.014970666418472925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,40,4,128,1,float16,fp8,0,0.014943999548753103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,40,8,128,1,float16,fp8,0,0.014831999937693277
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,40,8,128,1,float16,float16,0,0.015024000157912573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,40,1,128,1,float16,float16,0,0.013936000565687815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,40,1,128,1,float16,fp8,0,0.013274667163689932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,40,40,128,1,float16,float16,0,0.015295999745527903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,40,2,128,1,float16,float16,0,0.0138026662170887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,40,40,128,1,float16,fp8,0,0.016986666868130367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,40,2,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,40,4,128,1,float16,float16,0,0.013199999928474426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,40,4,128,1,float16,fp8,0,0.014842666685581207
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,40,8,128,1,float16,float16,0,0.013034666577974955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,40,8,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,40,1,128,1,float16,float16,0,0.29090134302775067
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,40,2,128,1,float16,float16,0,0.31274133920669556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,40,1,128,1,float16,fp8,0,0.2616373300552368
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,40,4,128,1,float16,fp8,0,0.35869332154591876
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,40,4,128,1,float16,float16,0,0.3964373270670573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,40,8,128,1,float16,float16,0,0.3972959915796916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,40,2,128,1,float16,fp8,0,0.2800426681836446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,40,40,128,1,float16,fp8,0,0.3082080086072286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,40,40,128,1,float16,float16,0,0.3025706609090169
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,40,8,128,1,float16,fp8,0,0.37412798404693604
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,40,1,128,1,float16,float16,0,0.15572266777356467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,40,1,128,1,float16,fp8,0,0.14030933380126953
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,40,2,128,1,float16,fp8,0,0.14830399552981058
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,40,2,128,1,float16,float16,0,0.1632373332977295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,40,4,128,1,float16,float16,0,0.20143999656041464
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,40,4,128,1,float16,fp8,0,0.19260799884796143
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,40,8,128,1,float16,float16,0,0.20731200774510702
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,40,8,128,1,float16,fp8,0,0.18862400452295938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,40,40,128,1,float16,float16,0,0.16328533490498862
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,40,1,128,1,float16,float16,0,0.08085866769154866
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,40,1,128,1,float16,fp8,0,0.07258133093516032
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,40,40,128,1,float16,fp8,0,0.16275733709335327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,40,2,128,1,float16,fp8,0,0.07689066727956136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,40,4,128,1,float16,float16,0,0.10530666510264079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,40,2,128,1,float16,float16,0,0.08758399883906047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,40,4,128,1,float16,fp8,0,0.0930560032526652
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,40,8,128,1,float16,float16,0,0.1055573324362437
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,40,8,128,1,float16,fp8,0,0.09627733627955119
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,40,1,128,1,float16,float16,0,0.04572799801826477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,40,1,128,1,float16,fp8,0,0.0425546665986379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,40,40,128,1,float16,float16,0,0.0876586635907491
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,40,40,128,1,float16,fp8,0,0.08178133269151051
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,40,2,128,1,float16,float16,0,0.046480000019073486
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,40,2,128,1,float16,fp8,0,0.044079999128977455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,40,4,128,1,float16,float16,0,0.05197866757710775
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,40,4,128,1,float16,fp8,0,0.05217599868774414
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,40,8,128,1,float16,float16,0,0.05233600238958994
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,40,8,128,1,float16,fp8,0,0.053317333261171974
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,40,1,128,1,float16,float16,0,0.028463999430338543
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,40,1,128,1,float16,fp8,0,0.027424000203609467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,40,2,128,1,float16,float16,0,0.0295413335164388
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,40,40,128,1,float16,fp8,0,0.04573333263397217
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,40,40,128,1,float16,float16,0,0.03773866593837738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,40,2,128,1,float16,fp8,0,0.029285334050655365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,40,4,128,1,float16,float16,0,0.03146133323510488
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,40,4,128,1,float16,fp8,0,0.031680000325044
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,40,8,128,1,float16,float16,0,0.031504000226656594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,40,8,128,1,float16,fp8,0,0.0329066663980484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,40,1,128,1,float16,fp8,0,0.019296000401178997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,40,1,128,1,float16,float16,0,0.019434666881958645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,40,2,128,1,float16,float16,0,0.0210506667693456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,40,40,128,1,float16,fp8,0,0.029285334050655365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,40,40,128,1,float16,float16,0,0.02510400116443634
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,40,2,128,1,float16,fp8,0,0.019381333142518997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,40,4,128,1,float16,fp8,0,0.021327999730904896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,40,4,128,1,float16,float16,0,0.021344001094500225
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,40,8,128,1,float16,fp8,0,0.02213866760333379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,40,8,128,1,float16,float16,0,0.021029333273569744
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,40,1,128,1,float16,float16,0,0.01643199970324834
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,40,1,128,1,float16,fp8,0,0.01632533346613248
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,40,2,128,1,float16,float16,0,0.017018667111794155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,40,2,128,1,float16,fp8,0,0.01642666632930438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,40,40,128,1,float16,fp8,0,0.02093333254257838
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,40,4,128,1,float16,float16,0,0.016800000021855038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,40,40,128,1,float16,float16,0,0.01736533393462499
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,40,4,128,1,float16,fp8,0,0.017018667111794155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,40,8,128,1,float16,float16,0,0.017045332739750545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,40,8,128,1,float16,fp8,0,0.0170666662355264
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,40,1,128,1,float16,float16,0,0.013183999806642532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,40,1,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,40,2,128,1,float16,float16,0,0.013183999806642532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,40,2,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,40,40,128,1,float16,fp8,0,0.015130666395028433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,40,40,128,1,float16,float16,0,0.014794666320085526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,40,4,128,1,float16,float16,0,0.013274667163689932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,40,4,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,40,8,128,1,float16,fp8,0,0.013264000415802002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,40,8,128,1,float16,float16,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,40,1,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,40,1,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,40,2,128,1,float16,float16,0,0.013199999928474426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,40,2,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,40,40,128,1,float16,fp8,0,0.015210667004187902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,40,4,128,1,float16,float16,0,0.01322666679819425
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,40,40,128,1,float16,float16,0,0.014896000425020853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,40,4,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,40,8,128,1,float16,float16,0,0.013125333935022354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,40,8,128,1,float16,fp8,0,0.013237333546082178
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,40,1,128,1,float16,float16,0,0.2661280035972595
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,40,1,128,1,float16,fp8,0,0.22554133335749307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,40,2,128,1,float16,float16,0,0.27958399057388306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,40,2,128,1,float16,fp8,0,0.2352480093638102
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,40,4,128,1,float16,fp8,0,0.27875200907389325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,40,4,128,1,float16,float16,0,0.31493866443634033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,40,8,128,1,float16,float16,0,0.3202720085779826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,40,40,128,1,float16,fp8,0,0.20414400100708008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,40,40,128,1,float16,float16,0,0.2198186715443929
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,40,1,128,1,float16,float16,0,0.1388746698697408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,40,8,128,1,float16,fp8,0,0.2895413239796956
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,40,1,128,1,float16,fp8,0,0.11470400293668111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,40,2,128,1,float16,float16,0,0.14602133631706238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,40,2,128,1,float16,fp8,0,0.1197119951248169
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,40,4,128,1,float16,fp8,0,0.1365173359711965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,40,4,128,1,float16,float16,0,0.16547733545303345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,40,8,128,1,float16,float16,0,0.1611199975013733
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,40,8,128,1,float16,fp8,0,0.14257599910100302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,40,1,128,1,float16,float16,0,0.07415999968846639
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,40,40,128,1,float16,float16,0,0.11541333794593811
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,40,1,128,1,float16,fp8,0,0.06242666641871134
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,40,40,128,1,float16,fp8,0,0.10774399836858113
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,40,2,128,1,float16,float16,0,0.07609066863854726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,40,4,128,1,float16,float16,0,0.07982933521270752
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,40,2,128,1,float16,fp8,0,0.06457066535949707
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,40,4,128,1,float16,fp8,0,0.07309866448243459
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,40,8,128,1,float16,float16,0,0.08083733419577281
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,40,8,128,1,float16,fp8,0,0.0727040022611618
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,40,1,128,1,float16,float16,0,0.04247466723124186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,40,40,128,1,float16,fp8,0,0.056128000219662987
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,40,1,128,1,float16,fp8,0,0.036890665690104164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,40,40,128,1,float16,float16,0,0.05677866439024607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,40,2,128,1,float16,float16,0,0.04368533194065094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,40,4,128,1,float16,float16,0,0.04584000011285146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,40,2,128,1,float16,fp8,0,0.03822399924198786
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,40,4,128,1,float16,fp8,0,0.04155199974775314
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,40,8,128,1,float16,float16,0,0.046122665206591286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,40,8,128,1,float16,fp8,0,0.04164800047874451
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,40,1,128,1,float16,fp8,0,0.025242666403452556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,40,1,128,1,float16,float16,0,0.027221334477265675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,40,40,128,1,float16,float16,0,0.03140799949566523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,40,40,128,1,float16,fp8,0,0.033359999457995095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,40,2,128,1,float16,float16,0,0.027317332724730175
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,40,2,128,1,float16,fp8,0,0.0252960001428922
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,40,4,128,1,float16,float16,0,0.027493332823117573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,40,8,128,1,float16,float16,0,0.02740799884001414
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,40,4,128,1,float16,fp8,0,0.026757332185904186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,40,8,128,1,float16,fp8,0,0.027221334477265675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,40,1,128,1,float16,float16,0,0.01903466631968816
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,40,1,128,1,float16,fp8,0,0.01956266661485036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,40,2,128,1,float16,float16,0,0.019082666685183842
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,40,2,128,1,float16,fp8,0,0.01918399954835574
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,40,40,128,1,float16,float16,0,0.021370666722456615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,40,40,128,1,float16,fp8,0,0.0222080002228419
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,40,4,128,1,float16,float16,0,0.019023999571800232
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,40,4,128,1,float16,fp8,0,0.019258666783571243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,40,8,128,1,float16,float16,0,0.019098666807015736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,40,8,128,1,float16,fp8,0,0.01907733331123988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,40,1,128,1,float16,float16,0,0.015237333873907724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,40,1,128,1,float16,fp8,0,0.015141333142916361
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,40,2,128,1,float16,float16,0,0.015205333630243937
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,40,40,128,1,float16,fp8,0,0.01773333301146825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,40,40,128,1,float16,float16,0,0.016938666502634685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,40,2,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,40,4,128,1,float16,float16,0,0.015253332753976187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,40,8,128,1,float16,float16,0,0.014826666563749313
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,40,8,128,1,float16,fp8,0,0.015184000134468079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,40,4,128,1,float16,fp8,0,0.015253332753976187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,40,1,128,1,float16,float16,0,0.012970666090647379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,40,1,128,1,float16,fp8,0,0.013130666067202887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,40,2,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,40,2,128,1,float16,float16,0,0.013056000073750814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,40,40,128,1,float16,float16,0,0.014933332800865173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,40,40,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,40,4,128,1,float16,fp8,0,0.013093333691358566
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,40,4,128,1,float16,float16,0,0.013178666432698568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,40,8,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,40,8,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,40,1,128,1,float16,float16,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,40,2,128,1,float16,float16,0,0.012938667088747025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,40,1,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,40,40,128,1,float16,fp8,0,0.015141333142916361
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,40,2,128,1,float16,fp8,0,0.012810666114091873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,40,4,128,1,float16,float16,0,0.01258133351802826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,40,40,128,1,float16,float16,0,0.013023999830087027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,40,8,128,1,float16,float16,0,0.01302933320403099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,40,4,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,40,8,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,40,1,128,1,float16,float16,0,0.255568007628123
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,40,2,128,1,float16,fp8,0,0.20616533358891806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,40,1,128,1,float16,fp8,0,0.20360000928243002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,40,2,128,1,float16,float16,0,0.26586665709813434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,40,4,128,1,float16,float16,0,0.279530664285024
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,40,4,128,1,float16,fp8,0,0.2235893408457438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,40,8,128,1,float16,float16,0,0.28411734104156494
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,40,8,128,1,float16,fp8,0,0.2315733234087626
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,40,1,128,1,float16,float16,0,0.13371200362841287
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,40,40,128,1,float16,float16,0,0.17338667313257852
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,40,40,128,1,float16,fp8,0,0.15015467007954916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,40,1,128,1,float16,fp8,0,0.10665599505106609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,40,2,128,1,float16,fp8,0,0.10761599739392598
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,40,2,128,1,float16,float16,0,0.13294933239618936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,40,4,128,1,float16,float16,0,0.1402720014254252
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,40,4,128,1,float16,fp8,0,0.11592533191045125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,40,8,128,1,float16,float16,0,0.14441600441932678
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,40,8,128,1,float16,fp8,0,0.11761066317558289
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,40,40,128,1,float16,fp8,0,0.07689066727956136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,40,1,128,1,float16,fp8,0,0.05820266902446747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,40,40,128,1,float16,float16,0,0.09363733728726704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,40,1,128,1,float16,float16,0,0.07267199953397115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,40,2,128,1,float16,fp8,0,0.06025066475073496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,40,4,128,1,float16,float16,0,0.07469333211580913
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,40,2,128,1,float16,float16,0,0.07277333239714305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,40,4,128,1,float16,fp8,0,0.0647626668214798
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,40,8,128,1,float16,float16,0,0.07495466868082683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,40,8,128,1,float16,fp8,0,0.0639466643333435
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,40,1,128,1,float16,float16,0,0.041477332512537636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,40,40,128,1,float16,float16,0,0.045909335215886436
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,40,1,128,1,float16,fp8,0,0.03545066714286804
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,40,2,128,1,float16,float16,0,0.041493333876132965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,40,40,128,1,float16,fp8,0,0.044010668992996216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,40,2,128,1,float16,fp8,0,0.03555200000603994
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,40,4,128,1,float16,float16,0,0.041749333341916404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,40,4,128,1,float16,fp8,0,0.03766400118668874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,40,8,128,1,float16,float16,0,0.04248533149560293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,40,8,128,1,float16,fp8,0,0.03765333443880081
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,40,1,128,1,float16,fp8,0,0.02325333406527837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,40,40,128,1,float16,fp8,0,0.027280000348885853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,40,1,128,1,float16,float16,0,0.025829332570234936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,40,40,128,1,float16,float16,0,0.02737066646416982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,40,2,128,1,float16,float16,0,0.025455998877684276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,40,2,128,1,float16,fp8,0,0.023434666295846302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,40,4,128,1,float16,float16,0,0.026714667677879333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,40,4,128,1,float16,fp8,0,0.023232000569502514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,40,8,128,1,float16,float16,0,0.02534399926662445
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,40,8,128,1,float16,fp8,0,0.02345066765944163
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,40,1,128,1,float16,float16,0,0.019109333554903667
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,40,1,128,1,float16,fp8,0,0.017423999806245167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,40,2,128,1,float16,float16,0,0.019237333287795384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,40,2,128,1,float16,fp8,0,0.017407999684413273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,40,40,128,1,float16,float16,0,0.021221332252025604
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,40,40,128,1,float16,fp8,0,0.021274665991465252
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,40,4,128,1,float16,float16,0,0.018965333700180054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,40,4,128,1,float16,fp8,0,0.018906666586796444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,40,8,128,1,float16,float16,0,0.01952533299724261
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,40,8,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,40,40,128,1,float16,fp8,0,0.018640000373125076
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,40,1,128,1,float16,float16,0,0.015360000232855478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,40,1,128,1,float16,fp8,0,0.015072000523408255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,40,40,128,1,float16,float16,0,0.017077332983414333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,40,2,128,1,float16,float16,0,0.01504533365368843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,40,2,128,1,float16,fp8,0,0.015295999745527903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,40,4,128,1,float16,float16,0,0.015184000134468079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,40,4,128,1,float16,fp8,0,0.015098666151364645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,40,8,128,1,float16,float16,0,0.015311999867359797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,40,8,128,1,float16,fp8,0,0.01523200049996376
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,40,1,128,1,float16,float16,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,40,1,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,40,2,128,1,float16,float16,0,0.012826666235923767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,40,40,128,1,float16,float16,0,0.012826666235923767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,40,40,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,40,2,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,40,4,128,1,float16,float16,0,0.012944000462690989
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,40,8,128,1,float16,fp8,0,0.013242666920026144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,40,8,128,1,float16,float16,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,40,4,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,40,1,128,1,float16,float16,0,0.010965333630641302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,40,1,128,1,float16,fp8,0,0.011114666859308878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,40,2,128,1,float16,float16,0,0.01119999960064888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,40,2,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,40,40,128,1,float16,float16,0,0.013264000415802002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,40,4,128,1,float16,float16,0,0.012800000607967377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,40,40,128,1,float16,fp8,0,0.015008000036080679
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,40,4,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,40,8,128,1,float16,float16,0,0.012693333129088083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,40,8,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,40,4,128,1,float16,float16,0,0.021407999098300934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,0,0.019253333409627277
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,0,0.025392000873883564
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,40,8,128,1,float16,float16,0,0.02754133443037669
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,40,40,128,1,float16,float16,0,0.06728533407052358
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,40,40,128,1,float16,fp8,0,0.05049600203831991
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,0,0.015381333728631338
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,0,0.015279999623696009
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,0,0.01905599981546402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,0,0.01746133342385292
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,40,40,128,1,float16,float16,0,0.034901333351929985
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,40,40,128,1,float16,fp8,0,0.03190933416287104
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,0,0.013194666554530462
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,0,0.014981333166360855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,0,0.01498666654030482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,40,40,128,1,float16,float16,0,0.021317332983016968
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,40,40,128,1,float16,fp8,0,0.022272000710169475
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,0,0.011141333729028702
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,0,0.011535999675591787
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,40,40,128,1,float16,fp8,0,0.01703466723362605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,40,40,128,1,float16,float16,0,0.017514667163292568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,0,0.011071999867757162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,0,0.010885333021481832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,0,0.011152000476916632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,40,40,128,1,float16,float16,0,0.013983999689420065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,40,40,128,1,float16,fp8,0,0.015130666395028433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,0,0.009493333597977957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,0,0.010645333677530289
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,0,0.011087999989589056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,0,0.011077333241701126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,40,40,128,1,float16,fp8,0,0.013290667285521826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,40,40,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,0,0.01073066641887029
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,0,0.011098666737476984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,0,0.010911999891201654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,0,0.010960000256697336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,40,40,128,1,float16,float16,0,0.01116266722480456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,40,40,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,0,0.01090666651725769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,0,0.011045332998037338
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,0,0.010602666685978571
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,0,0.009797333429257074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,40,40,128,1,float16,float16,0,0.011114666859308878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,40,40,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,0,0.010656000425418219
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,0,0.010826667149861654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,0,0.009589333087205887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,0,0.01051733394463857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,32,1,128,1,float16,fp8,0,10.931386311848959
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,32,2,128,1,float16,fp8,0,12.074384053548178
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,32,2,128,1,float16,float16,0,19.32259241739909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,32,1,128,1,float16,float16,0,19.34547170003255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,32,4,128,1,float16,fp8,0,12.03659184773763
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,32,8,128,1,float16,fp8,0,11.093748728434244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,32,4,128,1,float16,float16,0,20.188378651936848
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,32,8,128,1,float16,float16,0,19.032372792561848
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,32,32,128,1,float16,float16,0,8.93231455485026
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,32,32,128,1,float16,fp8,0,6.228842417399089
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,32,1,128,1,float16,float16,0,7.268016179402669
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,32,1,128,1,float16,fp8,0,5.233530680338542
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,32,2,128,1,float16,fp8,0,5.25437863667806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,32,2,128,1,float16,float16,0,10.192911783854166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,32,4,128,1,float16,fp8,0,5.652352015177409
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,32,4,128,1,float16,float16,0,7.872490564982097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,32,32,128,1,float16,float16,0,3.5206613540649414
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,32,8,128,1,float16,float16,0,7.889904022216797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,32,8,128,1,float16,fp8,0,6.11625607808431
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,32,32,128,1,float16,fp8,0,3.164095878601074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,32,2,128,1,float16,fp8,0,2.7392800649007163
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,32,1,128,1,float16,fp8,0,2.721856117248535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,32,2,128,1,float16,float16,0,3.5501600901285806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,32,1,128,1,float16,float16,0,4.645610809326172
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,32,4,128,1,float16,fp8,0,2.847584088643392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,32,4,128,1,float16,float16,0,3.2962026596069336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,32,8,128,1,float16,fp8,0,2.9814399083455405
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,32,8,128,1,float16,float16,0,3.380437215169271
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,32,1,128,1,float16,float16,0,1.6341066360473633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,32,32,128,1,float16,float16,0,1.8132212956746419
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,32,32,128,1,float16,fp8,0,2.039658705393473
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,32,1,128,1,float16,fp8,0,2.1339680353800454
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,32,2,128,1,float16,float16,0,1.6562719345092773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,32,2,128,1,float16,fp8,0,1.4224692980448406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,32,4,128,1,float16,float16,0,1.713391939798991
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,32,4,128,1,float16,fp8,0,1.642090638478597
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,32,8,128,1,float16,float16,0,1.6913545926411946
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,32,8,128,1,float16,fp8,0,1.481386661529541
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,32,1,128,1,float16,fp8,0,6.050005594889323
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,32,2,128,1,float16,float16,0,6.956053415934245
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,32,1,128,1,float16,float16,0,10.765210469563803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,32,2,128,1,float16,fp8,0,6.577728271484375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,32,4,128,1,float16,float16,0,9.308826446533203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,32,4,128,1,float16,fp8,0,6.593711853027344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,32,8,128,1,float16,float16,0,10.622026443481445
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,32,32,128,1,float16,fp8,0,3.80132261912028
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,32,8,128,1,float16,fp8,0,6.93235715230306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,32,32,128,1,float16,float16,0,4.268682797749837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,32,1,128,1,float16,float16,0,4.422570546468099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,32,1,128,1,float16,fp8,0,3.0020265579223633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,32,2,128,1,float16,float16,0,3.6915305455525718
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,32,2,128,1,float16,fp8,0,3.102640151977539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,32,4,128,1,float16,fp8,0,3.3672428131103516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,32,4,128,1,float16,float16,0,5.100864092508952
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,32,8,128,1,float16,fp8,0,3.3636480967203775
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,32,8,128,1,float16,float16,0,3.8864212036132812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,32,32,128,1,float16,float16,0,2.1070240338643393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,32,1,128,1,float16,float16,0,1.8210612932840984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,32,32,128,1,float16,fp8,0,2.0502187410990396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,32,1,128,1,float16,fp8,0,1.585269292195638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,32,2,128,1,float16,float16,0,1.8458186785380046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,32,2,128,1,float16,fp8,0,1.8176266352335613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,32,4,128,1,float16,fp8,0,1.7066027323404949
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,32,4,128,1,float16,float16,0,1.959232012430827
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,32,8,128,1,float16,float16,0,1.9737013181050618
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,32,8,128,1,float16,fp8,0,1.7121973037719727
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,32,32,128,1,float16,fp8,0,1.0388480027516682
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,32,32,128,1,float16,float16,0,1.1340906620025635
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,32,1,128,1,float16,fp8,0,0.8448586463928223
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,32,1,128,1,float16,float16,0,1.1100693543752034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,32,2,128,1,float16,fp8,0,0.8598399957021078
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,32,2,128,1,float16,float16,0,1.001029332478841
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,32,4,128,1,float16,float16,0,1.0486719608306885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,32,4,128,1,float16,fp8,0,0.9180640379587809
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,32,8,128,1,float16,fp8,0,0.9265600045522054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,32,8,128,1,float16,float16,0,1.0653386910756428
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,32,1,128,1,float16,fp8,0,4.2229814529418945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,32,1,128,1,float16,float16,0,4.856010754903157
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,32,2,128,1,float16,fp8,0,4.484453201293945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,32,2,128,1,float16,float16,0,5.329183896382649
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,32,4,128,1,float16,float16,0,7.319733301798503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,32,4,128,1,float16,fp8,0,4.652469317118327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,32,8,128,1,float16,fp8,0,4.708346684773763
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,32,8,128,1,float16,float16,0,8.39530118306478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,32,1,128,1,float16,float16,0,2.5178186098734536
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,32,32,128,1,float16,fp8,0,2.819674809773763
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,32,32,128,1,float16,float16,0,2.9766454696655273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,32,1,128,1,float16,fp8,0,2.2647573153177896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,32,2,128,1,float16,float16,0,2.5694026947021484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,32,2,128,1,float16,fp8,0,2.2094613711039224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,32,4,128,1,float16,fp8,0,2.3845067024230957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,32,4,128,1,float16,float16,0,3.6301867167154946
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,32,8,128,1,float16,fp8,0,2.383685270945231
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,32,8,128,1,float16,float16,0,2.7628533045450845
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,32,32,128,1,float16,float16,0,1.5845866203308105
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,32,1,128,1,float16,float16,0,1.3125813007354736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,32,32,128,1,float16,fp8,0,1.4917972882588704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,32,1,128,1,float16,fp8,0,1.4966452916463215
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,32,2,128,1,float16,float16,0,1.3673760096232097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,32,2,128,1,float16,fp8,0,1.153119961420695
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,32,4,128,1,float16,float16,0,1.4330026308695476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,32,4,128,1,float16,fp8,0,1.2566400369008381
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,32,8,128,1,float16,fp8,0,1.2531893253326416
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,32,8,128,1,float16,float16,0,1.472101370493571
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,32,1,128,1,float16,float16,0,0.720298687616984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,32,32,128,1,float16,float16,0,0.9035733540852865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,32,1,128,1,float16,fp8,0,0.6254133383433024
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,32,32,128,1,float16,fp8,0,0.7794079780578613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,32,2,128,1,float16,float16,0,0.7291519641876221
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,32,2,128,1,float16,fp8,0,0.6304213205973307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,32,4,128,1,float16,float16,0,0.7780640125274658
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,32,4,128,1,float16,fp8,0,0.691818634668986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,32,8,128,1,float16,float16,0,0.7695733706156412
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,32,8,128,1,float16,fp8,0,0.6737013657887777
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,32,1,128,1,float16,fp8,0,5.609930674235026
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,32,2,128,1,float16,fp8,0,5.621664047241211
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,32,2,128,1,float16,float16,0,8.819541295369467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,32,1,128,1,float16,float16,0,11.365962982177734
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,32,4,128,1,float16,fp8,0,6.228848139444987
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,32,4,128,1,float16,float16,0,9.84601084391276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,32,8,128,1,float16,fp8,0,6.448170979817708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,32,8,128,1,float16,float16,0,9.32474136352539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,32,32,128,1,float16,float16,0,4.002127965291341
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,32,1,128,1,float16,float16,0,3.321162541707357
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,32,32,128,1,float16,fp8,0,3.866117477416992
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,32,1,128,1,float16,fp8,0,3.1369120279947915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,32,2,128,1,float16,float16,0,3.3274081548055015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,32,2,128,1,float16,fp8,0,2.95360533396403
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,32,4,128,1,float16,float16,0,3.9383840560913086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,32,4,128,1,float16,fp8,0,3.148538589477539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,32,8,128,1,float16,fp8,0,3.2659680048624673
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,32,8,128,1,float16,float16,0,4.586453437805176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,32,32,128,1,float16,float16,0,2.137231985727946
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,32,32,128,1,float16,fp8,0,1.9682559967041016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,32,1,128,1,float16,float16,0,1.7137227058410645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,32,1,128,1,float16,fp8,0,1.4485920270284016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,32,2,128,1,float16,float16,0,1.6946934064229329
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,32,2,128,1,float16,fp8,0,1.8785066604614258
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,32,4,128,1,float16,float16,0,1.842517375946045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,32,4,128,1,float16,fp8,0,1.660805384318034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,32,8,128,1,float16,float16,0,1.8606613477071126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,32,8,128,1,float16,fp8,0,1.6448639233907063
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,32,32,128,1,float16,float16,0,1.079034646352132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,32,1,128,1,float16,float16,0,0.8913760185241699
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,32,32,128,1,float16,fp8,0,1.0209226608276367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,32,1,128,1,float16,fp8,0,0.7676800092061361
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,32,2,128,1,float16,float16,0,0.9055093129475912
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,32,2,128,1,float16,fp8,0,0.7879467010498047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,32,4,128,1,float16,float16,0,0.9856800238291422
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,32,4,128,1,float16,fp8,0,0.883232037226359
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,32,8,128,1,float16,float16,0,0.9675146738688151
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,32,8,128,1,float16,fp8,0,0.8553706804911295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,32,1,128,1,float16,float16,0,0.4971253474553426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,32,32,128,1,float16,float16,0,0.5800053278605143
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,32,1,128,1,float16,fp8,0,0.4245973428090413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,32,32,128,1,float16,fp8,0,0.5523306528727213
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,32,2,128,1,float16,float16,0,0.4997013409932454
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,32,2,128,1,float16,fp8,0,0.4323626756668091
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,32,4,128,1,float16,float16,0,0.5434026718139648
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,32,4,128,1,float16,fp8,0,0.47193066279093426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,32,8,128,1,float16,float16,0,0.5283466577529907
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,32,8,128,1,float16,fp8,0,0.48412267367045086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,32,1,128,1,float16,fp8,0,3.231151898701986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,32,1,128,1,float16,float16,0,4.947493235270183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,32,2,128,1,float16,fp8,0,3.361392021179199
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,32,2,128,1,float16,float16,0,3.8583412170410156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,32,4,128,1,float16,float16,0,5.136912027994792
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,32,4,128,1,float16,fp8,0,3.719242731730143
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,32,8,128,1,float16,float16,0,4.2369387944539385
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,32,8,128,1,float16,fp8,0,3.8139572143554688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,32,1,128,1,float16,fp8,0,1.668922742207845
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,32,1,128,1,float16,float16,0,2.388901392618815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,32,32,128,1,float16,fp8,0,2.4507733980814614
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,32,32,128,1,float16,float16,0,2.502373377482096
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,32,2,128,1,float16,float16,0,1.9580000241597493
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,32,2,128,1,float16,fp8,0,2.2548906008402505
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,32,4,128,1,float16,fp8,0,1.9728693962097168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,32,4,128,1,float16,float16,0,2.2246400515238443
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,32,8,128,1,float16,float16,0,2.188218593597412
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,32,8,128,1,float16,fp8,0,2.0019946098327637
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,32,32,128,1,float16,float16,0,1.2982880274454753
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,32,32,128,1,float16,fp8,0,1.2649866739908855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,32,1,128,1,float16,float16,0,1.0398240089416504
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,32,1,128,1,float16,fp8,0,0.8717333475748698
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,32,2,128,1,float16,fp8,0,0.8967999617258707
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,32,2,128,1,float16,float16,0,1.0299413204193115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,32,4,128,1,float16,float16,0,1.1433013280232747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,32,4,128,1,float16,fp8,0,1.0253600279490154
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,32,8,128,1,float16,fp8,0,1.024672031402588
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,32,8,128,1,float16,float16,0,1.139408032099406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,32,32,128,1,float16,fp8,0,0.6660906473795573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,32,32,128,1,float16,float16,0,0.6885866324106852
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,32,1,128,1,float16,float16,0,0.5448053280512491
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,32,1,128,1,float16,fp8,0,0.4758026599884033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,32,2,128,1,float16,float16,0,0.5617013374964396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,32,2,128,1,float16,fp8,0,0.4866986672083537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,32,4,128,1,float16,float16,0,0.6208639939626058
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,32,4,128,1,float16,fp8,0,0.5562346776326498
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,32,8,128,1,float16,float16,0,0.6100533405939738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,32,8,128,1,float16,fp8,0,0.5498559872309366
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,32,1,128,1,float16,float16,0,0.3018346627553304
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,32,32,128,1,float16,fp8,0,0.3651253382364909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,32,32,128,1,float16,float16,0,0.3790026505788167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,32,1,128,1,float16,fp8,0,0.2645973364512126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,32,2,128,1,float16,float16,0,0.3088266650835673
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,32,2,128,1,float16,fp8,0,0.271999994913737
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,32,4,128,1,float16,fp8,0,0.29577600955963135
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,32,4,128,1,float16,float16,0,0.343450665473938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,32,8,128,1,float16,float16,0,0.3360746701558431
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,32,8,128,1,float16,fp8,0,0.30085333188374835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,32,1,128,1,float16,fp8,0,3.081280072530111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,32,1,128,1,float16,float16,0,4.108778635660808
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,32,2,128,1,float16,fp8,0,3.231792132059733
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,32,2,128,1,float16,float16,0,3.64191468556722
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,32,4,128,1,float16,float16,0,4.733045260111491
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,32,8,128,1,float16,fp8,0,3.861226717631022
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,32,4,128,1,float16,fp8,0,3.7123146057128906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,32,8,128,1,float16,float16,0,4.271045366923015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,32,1,128,1,float16,float16,0,1.7996266682942708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,32,1,128,1,float16,fp8,0,1.5750133196512859
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,32,32,128,1,float16,fp8,0,2.6462559700012207
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,32,32,128,1,float16,float16,0,2.741562525431315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,32,2,128,1,float16,fp8,0,1.6502772967020671
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,32,2,128,1,float16,float16,0,1.8915893236796062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,32,4,128,1,float16,fp8,0,1.9071307182312012
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,32,4,128,1,float16,float16,0,2.1462987263997397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,32,8,128,1,float16,fp8,0,2.0792160034179688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,32,8,128,1,float16,float16,0,2.172154744466146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,32,1,128,1,float16,fp8,0,0.8704799811045328
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,32,1,128,1,float16,float16,0,0.9431573549906412
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,32,32,128,1,float16,fp8,0,1.3508319854736328
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,32,32,128,1,float16,float16,0,1.3424639701843262
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,32,2,128,1,float16,fp8,0,0.8677173455556234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,32,2,128,1,float16,float16,0,0.9973440170288086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,32,4,128,1,float16,float16,0,1.113327980041504
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,32,4,128,1,float16,fp8,0,1.0004586378733318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,32,8,128,1,float16,float16,0,1.1229973634084065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,32,8,128,1,float16,fp8,0,1.0096960067749023
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,32,1,128,1,float16,float16,0,0.505898674329122
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,32,32,128,1,float16,float16,0,0.7002666791280111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,32,1,128,1,float16,fp8,0,0.43777068456013996
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,32,32,128,1,float16,fp8,0,0.7042559782663981
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,32,2,128,1,float16,float16,0,0.5234986543655396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,32,2,128,1,float16,fp8,0,0.46566931406656903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,32,4,128,1,float16,float16,0,0.6116319894790649
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,32,4,128,1,float16,fp8,0,0.5643786589304606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,32,8,128,1,float16,float16,0,0.5844159921010336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,32,8,128,1,float16,fp8,0,0.5323733488718668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,32,1,128,1,float16,float16,0,0.2811573346455892
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,32,32,128,1,float16,float16,0,0.3804159959157308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,32,2,128,1,float16,float16,0,0.288480003674825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,32,32,128,1,float16,fp8,0,0.37544532616933185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,32,1,128,1,float16,fp8,0,0.23996265729268393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,32,2,128,1,float16,fp8,0,0.25470399856567383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,32,4,128,1,float16,float16,0,0.3304160038630168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,32,4,128,1,float16,fp8,0,0.3032853404680888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,32,8,128,1,float16,float16,0,0.31994666655858356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,32,8,128,1,float16,fp8,0,0.30636266867319745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,32,1,128,1,float16,float16,0,0.15621333320935568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,32,1,128,1,float16,fp8,0,0.1411946713924408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,32,32,128,1,float16,fp8,0,0.2095200022061666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,32,32,128,1,float16,float16,0,0.2233440081278483
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,32,2,128,1,float16,float16,0,0.16106667121251425
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,32,2,128,1,float16,fp8,0,0.14697600404421488
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,32,4,128,1,float16,float16,0,0.17332265774408975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,32,4,128,1,float16,fp8,0,0.16299733519554138
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,32,8,128,1,float16,float16,0,0.17704000075658163
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,32,8,128,1,float16,fp8,0,0.16245333353678384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,32,1,128,1,float16,float16,0,2.1252427101135254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,32,1,128,1,float16,fp8,0,1.866890589396159
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,32,2,128,1,float16,fp8,0,2.014832019805908
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,32,2,128,1,float16,float16,0,2.233301321665446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,32,4,128,1,float16,fp8,0,2.571349302927653
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,32,8,128,1,float16,float16,0,2.6281439463297525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,32,4,128,1,float16,float16,0,3.173930803934733
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,32,8,128,1,float16,fp8,0,2.439045270284017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,32,1,128,1,float16,float16,0,1.1608853340148926
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,32,1,128,1,float16,fp8,0,0.9653653303782145
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,32,32,128,1,float16,float16,0,1.7325545946757
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,32,32,128,1,float16,fp8,0,1.7699467341105144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,32,2,128,1,float16,float16,0,1.1584320068359375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,32,2,128,1,float16,fp8,0,1.0201973120371501
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,32,4,128,1,float16,float16,0,1.422554651896159
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,32,4,128,1,float16,fp8,0,1.2192266782124836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,32,8,128,1,float16,fp8,0,1.2515947024027507
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,32,8,128,1,float16,float16,0,1.3633492787679036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,32,1,128,1,float16,fp8,0,0.5129119952519735
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,32,1,128,1,float16,float16,0,0.5896533330281576
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,32,32,128,1,float16,float16,0,0.8916959762573242
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,32,32,128,1,float16,fp8,0,0.9092373053232828
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,32,2,128,1,float16,float16,0,0.6085066795349121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,32,2,128,1,float16,fp8,0,0.544053316116333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,32,4,128,1,float16,float16,0,0.7265173594156901
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,32,4,128,1,float16,fp8,0,0.6670239766438802
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,32,8,128,1,float16,fp8,0,0.6911306381225586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,32,8,128,1,float16,float16,0,0.7367839813232422
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,32,32,128,1,float16,float16,0,0.4647039969762166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,32,1,128,1,float16,fp8,0,0.2784480055173238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,32,1,128,1,float16,float16,0,0.31514134009679157
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,32,32,128,1,float16,fp8,0,0.47667733828226727
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,32,2,128,1,float16,fp8,0,0.29797865947087604
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,32,2,128,1,float16,float16,0,0.34227200349171955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,32,4,128,1,float16,float16,0,0.38257598876953125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,32,4,128,1,float16,fp8,0,0.35598401228586835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,32,8,128,1,float16,float16,0,0.38446935017903644
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,32,8,128,1,float16,fp8,0,0.36635732650756836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,32,1,128,1,float16,fp8,0,0.15452800194422403
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,32,1,128,1,float16,float16,0,0.1743946671485901
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,32,32,128,1,float16,float16,0,0.2518720030784607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,32,32,128,1,float16,fp8,0,0.2576906681060791
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,32,2,128,1,float16,fp8,0,0.16030399998029074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,32,2,128,1,float16,float16,0,0.1813973387082418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,32,4,128,1,float16,fp8,0,0.18734933932622275
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,32,4,128,1,float16,float16,0,0.21455466747283936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,32,8,128,1,float16,fp8,0,0.19425066312154135
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,32,8,128,1,float16,float16,0,0.21089067061742148
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,32,1,128,1,float16,float16,0,0.10215466221173604
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,32,32,128,1,float16,fp8,0,0.1431839962800344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,32,1,128,1,float16,fp8,0,0.09310932954152425
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,32,32,128,1,float16,float16,0,0.15427733461062113
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,32,2,128,1,float16,float16,0,0.10507200161616008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,32,2,128,1,float16,fp8,0,0.095360000928243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,32,4,128,1,float16,float16,0,0.11371733744939168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,32,4,128,1,float16,fp8,0,0.10800000031789143
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,32,8,128,1,float16,fp8,0,0.10827199618021648
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,32,8,128,1,float16,float16,0,0.11437333623568217
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,32,1,128,1,float16,float16,0,2.1145013173421225
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,32,1,128,1,float16,fp8,0,1.8850666681925456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,32,2,128,1,float16,float16,0,2.252895991007487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,32,2,128,1,float16,fp8,0,2.0342186292012534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,32,4,128,1,float16,float16,0,2.836981455485026
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,32,4,128,1,float16,fp8,0,2.8180694580078125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,32,8,128,1,float16,fp8,0,2.683354695638021
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,32,8,128,1,float16,float16,0,2.7981494267781577
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,32,1,128,1,float16,float16,0,1.0873119831085205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,32,32,128,1,float16,float16,0,1.8952693939208984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,32,32,128,1,float16,fp8,0,2.0274133682250977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,32,1,128,1,float16,fp8,0,0.9554133415222168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,32,2,128,1,float16,float16,0,1.159589370091756
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,32,2,128,1,float16,fp8,0,1.0437333583831787
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,32,4,128,1,float16,float16,0,1.4571679433186848
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,32,4,128,1,float16,fp8,0,1.297327995300293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,32,8,128,1,float16,fp8,0,1.4277386665344238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,32,8,128,1,float16,float16,0,1.524122714996338
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,32,32,128,1,float16,float16,0,0.9724053541819254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,32,1,128,1,float16,float16,0,0.5726346572240194
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,32,1,128,1,float16,fp8,0,0.4999946753184001
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,32,32,128,1,float16,fp8,0,1.0361920197804768
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,32,2,128,1,float16,fp8,0,0.5410773356755575
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,32,2,128,1,float16,float16,0,0.6062719821929932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,32,4,128,1,float16,float16,0,0.7635520299275717
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,32,4,128,1,float16,fp8,0,0.7549599806467692
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,32,8,128,1,float16,float16,0,0.743658701578776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,32,8,128,1,float16,fp8,0,0.694106658299764
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,32,1,128,1,float16,float16,0,0.30694399277369183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,32,32,128,1,float16,float16,0,0.5068960189819336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,32,1,128,1,float16,fp8,0,0.26984532674153644
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,32,2,128,1,float16,float16,0,0.324346661567688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,32,32,128,1,float16,fp8,0,0.5374293327331543
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,32,2,128,1,float16,fp8,0,0.2921866575876872
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,32,4,128,1,float16,float16,0,0.41125865777333576
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,32,4,128,1,float16,fp8,0,0.3938346703847249
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,32,8,128,1,float16,float16,0,0.4129546483357747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,32,8,128,1,float16,fp8,0,0.36820268630981445
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,32,1,128,1,float16,float16,0,0.1716053287188212
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,32,32,128,1,float16,float16,0,0.27798400322596234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,32,1,128,1,float16,fp8,0,0.1465226709842682
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,32,32,128,1,float16,fp8,0,0.28303466240564984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,32,2,128,1,float16,float16,0,0.18240533272425333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,32,4,128,1,float16,float16,0,0.2216320037841797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,32,2,128,1,float16,fp8,0,0.15890133380889893
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,32,4,128,1,float16,fp8,0,0.20516266425450644
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,32,8,128,1,float16,float16,0,0.22268799940745035
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,32,8,128,1,float16,fp8,0,0.21481066942214966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,32,1,128,1,float16,float16,0,0.0936906635761261
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,32,1,128,1,float16,fp8,0,0.08468266328175862
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,32,2,128,1,float16,float16,0,0.09724266330401103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,32,32,128,1,float16,fp8,0,0.15448000033696493
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,32,32,128,1,float16,float16,0,0.1488800048828125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,32,2,128,1,float16,fp8,0,0.0899839997291565
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,32,4,128,1,float16,float16,0,0.10985599954922994
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,32,4,128,1,float16,fp8,0,0.1055413285891215
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,32,8,128,1,float16,float16,0,0.11599999666213989
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,32,8,128,1,float16,fp8,0,0.10608533024787903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,32,1,128,1,float16,fp8,0,0.055770665407180786
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,32,1,128,1,float16,float16,0,0.060778667529424034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,32,32,128,1,float16,fp8,0,0.08088000118732452
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,32,2,128,1,float16,float16,0,0.06250666578610738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,32,2,128,1,float16,fp8,0,0.05819733440876007
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,32,32,128,1,float16,float16,0,0.09019733468691508
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,32,4,128,1,float16,fp8,0,0.06598400076230367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,32,4,128,1,float16,float16,0,0.06776000062624614
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,32,8,128,1,float16,fp8,0,0.06638933221499126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,32,8,128,1,float16,float16,0,0.06750933329264323
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,32,1,128,1,float16,fp8,0,1.1879359881083171
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,32,1,128,1,float16,float16,0,1.37172269821167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,32,2,128,1,float16,float16,0,1.44485870997111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,32,2,128,1,float16,fp8,0,1.3087573051452637
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,32,4,128,1,float16,float16,0,1.9351414044698079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,32,4,128,1,float16,fp8,0,1.689194679260254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,32,8,128,1,float16,float16,0,1.8276586532592773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,32,8,128,1,float16,fp8,0,1.7913066546122234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,32,1,128,1,float16,float16,0,0.6997493108113607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,32,1,128,1,float16,fp8,0,0.6147146622339884
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,32,32,128,1,float16,float16,0,1.2998507022857666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,32,32,128,1,float16,fp8,0,1.4195733070373535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,32,2,128,1,float16,float16,0,0.7588586807250977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,32,2,128,1,float16,fp8,0,0.6801599661509196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,32,4,128,1,float16,float16,0,0.9594559669494629
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,32,4,128,1,float16,fp8,0,0.8681706587473551
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,32,8,128,1,float16,float16,0,0.9512693087259928
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,32,8,128,1,float16,fp8,0,0.9188480377197266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,32,32,128,1,float16,fp8,0,0.7262187004089355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,32,32,128,1,float16,float16,0,0.6683839956919352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,32,1,128,1,float16,float16,0,0.37170668443044025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,32,1,128,1,float16,fp8,0,0.33057065804799396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,32,2,128,1,float16,float16,0,0.4053013324737549
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,32,2,128,1,float16,fp8,0,0.35901331901550293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,32,4,128,1,float16,fp8,0,0.48555731773376465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,32,4,128,1,float16,float16,0,0.5147466659545898
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,32,8,128,1,float16,fp8,0,0.46641600131988525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,32,8,128,1,float16,float16,0,0.49106132984161377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,32,32,128,1,float16,fp8,0,0.3779146671295166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,32,32,128,1,float16,float16,0,0.3545813163121541
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,32,1,128,1,float16,float16,0,0.20082666476567587
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,32,1,128,1,float16,fp8,0,0.17705066998799643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,32,2,128,1,float16,float16,0,0.21904534101486206
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,32,4,128,1,float16,float16,0,0.27268266677856445
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,32,2,128,1,float16,fp8,0,0.1973759929339091
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,32,4,128,1,float16,fp8,0,0.26375999053319293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,32,8,128,1,float16,float16,0,0.2707146604855855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,32,8,128,1,float16,fp8,0,0.26480533679326373
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,32,1,128,1,float16,float16,0,0.10940266648928325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,32,32,128,1,float16,fp8,0,0.20070934295654297
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,32,32,128,1,float16,float16,0,0.18778133392333984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,32,1,128,1,float16,fp8,0,0.0972106655438741
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,32,2,128,1,float16,float16,0,0.11851200461387634
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,32,4,128,1,float16,float16,0,0.14739200472831726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,32,2,128,1,float16,fp8,0,0.10497599840164185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,32,4,128,1,float16,fp8,0,0.12796266873677573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,32,8,128,1,float16,float16,0,0.15089600284894308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,32,8,128,1,float16,fp8,0,0.1360373298327128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,32,1,128,1,float16,float16,0,0.06524799764156342
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,32,1,128,1,float16,fp8,0,0.060133333007494606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,32,32,128,1,float16,float16,0,0.11668800314267476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,32,32,128,1,float16,fp8,0,0.10945066809654236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,32,2,128,1,float16,float16,0,0.06702400247255962
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,32,2,128,1,float16,fp8,0,0.06440000236034393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,32,4,128,1,float16,float16,0,0.0758186678091685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,32,4,128,1,float16,fp8,0,0.0767626663049062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,32,8,128,1,float16,fp8,0,0.07702399790287018
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,32,8,128,1,float16,float16,0,0.07673599819342296
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,32,1,128,1,float16,float16,0,0.043663998444875084
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,32,32,128,1,float16,float16,0,0.055344000458717346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,32,1,128,1,float16,fp8,0,0.039664000272750854
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,32,32,128,1,float16,fp8,0,0.06046399970849355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,32,2,128,1,float16,float16,0,0.04459733267625173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,32,2,128,1,float16,fp8,0,0.04162666698296865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,32,4,128,1,float16,float16,0,0.04971733192602793
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,32,4,128,1,float16,fp8,0,0.04799999793370565
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,32,8,128,1,float16,float16,0,0.049039999643961586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,32,8,128,1,float16,fp8,0,0.049466664592425026
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,32,1,128,1,float16,float16,0,1.4256960550944011
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,32,1,128,1,float16,fp8,0,1.2560906410217285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,32,2,128,1,float16,float16,0,1.567226727803548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,32,2,128,1,float16,fp8,0,1.4251680374145508
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,32,4,128,1,float16,float16,0,2.156485398610433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,32,8,128,1,float16,float16,0,2.1477386156717935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,32,4,128,1,float16,fp8,0,1.8592586517333984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,32,8,128,1,float16,fp8,0,2.0900373458862305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,32,1,128,1,float16,float16,0,0.7494826316833496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,32,1,128,1,float16,fp8,0,0.6495946645736694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,32,32,128,1,float16,fp8,0,1.719696044921875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,32,32,128,1,float16,float16,0,1.5397599538167317
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,32,2,128,1,float16,float16,0,0.8241120179494222
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,32,2,128,1,float16,fp8,0,0.7257226308186849
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,32,4,128,1,float16,float16,0,1.1071253617604573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,32,4,128,1,float16,fp8,0,1.1291946570078533
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,32,8,128,1,float16,float16,0,1.1625920136769612
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,32,8,128,1,float16,fp8,0,1.0502933661142986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,32,1,128,1,float16,float16,0,0.39079999923706055
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,32,1,128,1,float16,fp8,0,0.3434133529663086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,32,32,128,1,float16,float16,0,0.7881813049316406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,32,2,128,1,float16,float16,0,0.4336746533711751
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,32,32,128,1,float16,fp8,0,0.8736373583475748
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,32,2,128,1,float16,fp8,0,0.3867093324661255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,32,4,128,1,float16,float16,0,0.5892053445180258
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,32,8,128,1,float16,float16,0,0.5543466806411743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,32,4,128,1,float16,fp8,0,0.5890933275222778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,32,1,128,1,float16,float16,0,0.20829866329828897
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,32,8,128,1,float16,fp8,0,0.5719253222147623
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,32,1,128,1,float16,fp8,0,0.18148799737294516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,32,32,128,1,float16,fp8,0,0.44971732298533124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,32,32,128,1,float16,float16,0,0.4086666504542033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,32,2,128,1,float16,float16,0,0.2244373361269633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,32,2,128,1,float16,fp8,0,0.2034133275349935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,32,4,128,1,float16,float16,0,0.3033546606699626
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,32,4,128,1,float16,fp8,0,0.3113119999567668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,32,8,128,1,float16,float16,0,0.29047467311223346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,32,8,128,1,float16,fp8,0,0.28014934062957764
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,32,1,128,1,float16,float16,0,0.11437867085138957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,32,1,128,1,float16,fp8,0,0.09949866930643718
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,32,2,128,1,float16,float16,0,0.12195199728012085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,32,32,128,1,float16,float16,0,0.21670933564503989
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,32,2,128,1,float16,fp8,0,0.1116373340288798
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,32,32,128,1,float16,fp8,0,0.23775466283162436
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,32,4,128,1,float16,float16,0,0.1612320045630137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,32,8,128,1,float16,fp8,0,0.166703999042511
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,32,4,128,1,float16,fp8,0,0.15506133437156677
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,32,8,128,1,float16,float16,0,0.1686026652654012
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,32,1,128,1,float16,float16,0,0.0631573349237442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,32,1,128,1,float16,fp8,0,0.05827199916044871
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,32,2,128,1,float16,float16,0,0.0662773350874583
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,32,2,128,1,float16,fp8,0,0.06295466423034668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,32,32,128,1,float16,fp8,0,0.12827733159065247
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,32,32,128,1,float16,float16,0,0.11728533109029134
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,32,4,128,1,float16,fp8,0,0.07853333155314128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,32,4,128,1,float16,float16,0,0.07775466640790303
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,32,8,128,1,float16,fp8,0,0.07935466865698497
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,32,8,128,1,float16,float16,0,0.08379200100898743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,32,1,128,1,float16,float16,0,0.04085333396991094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,32,1,128,1,float16,fp8,0,0.03779733429352442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,32,2,128,1,float16,float16,0,0.04164800047874451
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,32,2,128,1,float16,fp8,0,0.04002666721741358
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,32,32,128,1,float16,fp8,0,0.06403733293215434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,32,32,128,1,float16,float16,0,0.06251733501752217
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,32,4,128,1,float16,float16,0,0.04752000172932943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,32,4,128,1,float16,fp8,0,0.047744000951449074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,32,8,128,1,float16,float16,0,0.04770666857560476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,32,8,128,1,float16,fp8,0,0.04789333542188009
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,32,1,128,1,float16,float16,0,0.025386666258176167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,32,1,128,1,float16,fp8,0,0.02521066615978877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,32,32,128,1,float16,fp8,0,0.03772799919048945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,32,32,128,1,float16,float16,0,0.033530667424201965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,32,2,128,1,float16,float16,0,0.02716800073782603
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,32,2,128,1,float16,fp8,0,0.027114666998386383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,32,4,128,1,float16,float16,0,0.029296000798543293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,32,4,128,1,float16,fp8,0,0.031189332405726116
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,32,8,128,1,float16,float16,0,0.0296426663796107
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,32,8,128,1,float16,fp8,0,0.03120533376932144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,32,1,128,1,float16,float16,0,1.1094826857248943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,32,1,128,1,float16,fp8,0,0.9644426504770914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,32,2,128,1,float16,float16,0,1.2326613267262776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,32,2,128,1,float16,fp8,0,1.1241386731465657
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,32,4,128,1,float16,float16,0,1.876922607421875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,32,4,128,1,float16,fp8,0,1.8965706825256348
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,32,32,128,1,float16,fp8,0,1.56113068262736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,32,32,128,1,float16,float16,0,1.3685973485310872
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,32,8,128,1,float16,fp8,0,1.757765293121338
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,32,8,128,1,float16,float16,0,1.8152532577514648
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,32,1,128,1,float16,float16,0,0.5797760089238485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,32,1,128,1,float16,fp8,0,0.49961066246032715
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,32,2,128,1,float16,float16,0,0.6486186583836874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,32,2,128,1,float16,fp8,0,0.5808320045471191
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,32,4,128,1,float16,float16,0,0.9846453666687012
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,32,4,128,1,float16,fp8,0,0.9690453211466471
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,32,8,128,1,float16,float16,0,0.9785760243733724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,32,8,128,1,float16,fp8,0,0.9645973046620687
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,32,32,128,1,float16,float16,0,0.6971039772033691
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,32,32,128,1,float16,fp8,0,0.7933440208435059
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,32,1,128,1,float16,float16,0,0.3017973303794861
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,32,1,128,1,float16,fp8,0,0.25939732789993286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,32,2,128,1,float16,float16,0,0.3434773286183675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,32,2,128,1,float16,fp8,0,0.309114674727122
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,32,4,128,1,float16,float16,0,0.48497601350148517
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,32,4,128,1,float16,fp8,0,0.456005334854126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,32,8,128,1,float16,fp8,0,0.5058346589406332
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,32,8,128,1,float16,float16,0,0.5080159902572632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,32,32,128,1,float16,float16,0,0.360640009244283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,32,32,128,1,float16,fp8,0,0.406277338663737
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,32,1,128,1,float16,float16,0,0.16220266620318094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,32,1,128,1,float16,fp8,0,0.13909332950909933
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,32,2,128,1,float16,float16,0,0.18518932660420737
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,32,2,128,1,float16,fp8,0,0.1592693328857422
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,32,4,128,1,float16,fp8,0,0.264847993850708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,32,4,128,1,float16,float16,0,0.2622720003128052
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,32,8,128,1,float16,float16,0,0.24877333641052246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,32,8,128,1,float16,fp8,0,0.2433333396911621
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,32,1,128,1,float16,float16,0,0.090037335952123
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,32,32,128,1,float16,float16,0,0.1890933314959208
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,32,32,128,1,float16,fp8,0,0.2121866742769877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,32,1,128,1,float16,fp8,0,0.07707733412583669
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,32,2,128,1,float16,float16,0,0.09850666920344035
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,32,4,128,1,float16,float16,0,0.13367467125256857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,32,2,128,1,float16,fp8,0,0.08919466535250346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,32,4,128,1,float16,fp8,0,0.12897066275278726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,32,8,128,1,float16,float16,0,0.13333866993586221
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,32,8,128,1,float16,fp8,0,0.1301706631978353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,32,1,128,1,float16,float16,0,0.05009066561857859
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,32,32,128,1,float16,fp8,0,0.11356799801190694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,32,32,128,1,float16,float16,0,0.10177066922187805
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,32,1,128,1,float16,fp8,0,0.045423999428749084
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,32,2,128,1,float16,float16,0,0.053413331508636475
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,32,2,128,1,float16,fp8,0,0.049456000328063965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,32,4,128,1,float16,fp8,0,0.06496533254782359
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,32,4,128,1,float16,float16,0,0.06394133468468984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,32,8,128,1,float16,float16,0,0.06684266527493794
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,32,8,128,1,float16,fp8,0,0.06644799808661143
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,32,1,128,1,float16,float16,0,0.0315786674618721
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,32,32,128,1,float16,fp8,0,0.05513066550095876
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,32,1,128,1,float16,fp8,0,0.029301332930723827
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,32,2,128,1,float16,float16,0,0.03312533348798752
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,32,32,128,1,float16,float16,0,0.05338666836420695
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,32,2,128,1,float16,fp8,0,0.03142400085926056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,32,4,128,1,float16,float16,0,0.03752533346414566
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,32,4,128,1,float16,fp8,0,0.03956266740957896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,32,8,128,1,float16,float16,0,0.03923733284076055
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,32,8,128,1,float16,fp8,0,0.03948266555865606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,32,1,128,1,float16,float16,0,0.020128000527620316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,32,1,128,1,float16,fp8,0,0.01933866615096728
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,32,2,128,1,float16,float16,0,0.020986666282018025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,32,32,128,1,float16,fp8,0,0.03286399940649668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,32,32,128,1,float16,float16,0,0.0284853329261144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,32,2,128,1,float16,fp8,0,0.02109333376089732
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,32,4,128,1,float16,float16,0,0.023520000278949738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,32,4,128,1,float16,fp8,0,0.025279998779296875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,32,8,128,1,float16,float16,0,0.02346133440732956
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,32,8,128,1,float16,fp8,0,0.025285333395004272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,32,1,128,1,float16,fp8,0,0.018911999960740406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,32,1,128,1,float16,float16,0,0.019071999937295914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,32,32,128,1,float16,float16,0,0.021727999051411945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,32,2,128,1,float16,float16,0,0.01912533367673556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,32,32,128,1,float16,fp8,0,0.025562666356563568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,32,2,128,1,float16,fp8,0,0.01924266666173935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,32,4,128,1,float16,fp8,0,0.020527999848127365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,32,4,128,1,float16,float16,0,0.020474666108687718
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,32,8,128,1,float16,float16,0,0.020960000654061634
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,32,8,128,1,float16,fp8,0,0.021253332495689392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,32,1,128,1,float16,float16,0,0.4449546734491984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,32,1,128,1,float16,fp8,0,0.4060693184534709
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,32,2,128,1,float16,float16,0,0.5131359895070394
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,32,2,128,1,float16,fp8,0,0.48649601141611737
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,32,4,128,1,float16,float16,0,0.8341066837310791
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,32,4,128,1,float16,fp8,0,0.8759306271870931
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,32,8,128,1,float16,float16,0,0.76473601659139
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,32,32,128,1,float16,float16,0,0.6340693235397339
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,32,32,128,1,float16,fp8,0,0.7440319856007894
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,32,8,128,1,float16,fp8,0,0.8718132972717285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,32,1,128,1,float16,fp8,0,0.21569067239761353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,32,1,128,1,float16,float16,0,0.2334346572558085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,32,2,128,1,float16,float16,0,0.26993600527445477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,32,2,128,1,float16,fp8,0,0.2574453353881836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,32,4,128,1,float16,float16,0,0.4201226631800334
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,32,4,128,1,float16,fp8,0,0.4577706654866536
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,32,8,128,1,float16,float16,0,0.39971200625101727
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,32,8,128,1,float16,fp8,0,0.4122613271077474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,32,32,128,1,float16,float16,0,0.3270240028699239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,32,1,128,1,float16,float16,0,0.12610133488972983
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,32,1,128,1,float16,fp8,0,0.11362133423487346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,32,32,128,1,float16,fp8,0,0.37998934586842853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,32,2,128,1,float16,fp8,0,0.13500799735387167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,32,2,128,1,float16,float16,0,0.15108799934387207
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,32,4,128,1,float16,float16,0,0.2292799949645996
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,32,4,128,1,float16,fp8,0,0.2390186587969462
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,32,8,128,1,float16,fp8,0,0.217466672261556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,32,8,128,1,float16,float16,0,0.21500267585118613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,32,32,128,1,float16,fp8,0,0.19699199994405112
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,32,32,128,1,float16,float16,0,0.170741339524587
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,32,1,128,1,float16,float16,0,0.07085866729418437
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,32,1,128,1,float16,fp8,0,0.06165866553783417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,32,2,128,1,float16,float16,0,0.07775466640790303
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,32,2,128,1,float16,fp8,0,0.07179200152556102
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,32,4,128,1,float16,float16,0,0.1172320048014323
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,32,4,128,1,float16,fp8,0,0.1132426659266154
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,32,8,128,1,float16,float16,0,0.12173333764076233
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,32,8,128,1,float16,fp8,0,0.1267466644446055
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,32,1,128,1,float16,float16,0,0.03931200007597605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,32,1,128,1,float16,fp8,0,0.03732266773780187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,32,32,128,1,float16,fp8,0,0.1074079970518748
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,32,32,128,1,float16,float16,0,0.09483200311660767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,32,2,128,1,float16,float16,0,0.04200533529122671
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,32,2,128,1,float16,fp8,0,0.043040002385775246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,32,4,128,1,float16,float16,0,0.0539680023988088
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,32,4,128,1,float16,fp8,0,0.05796800057093302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,32,8,128,1,float16,float16,0,0.06026133398214976
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,32,8,128,1,float16,fp8,0,0.05898666878541311
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,32,1,128,1,float16,float16,0,0.02514133354028066
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,32,32,128,1,float16,fp8,0,0.0498986691236496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,32,1,128,1,float16,fp8,0,0.023370665808518726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,32,2,128,1,float16,float16,0,0.025957333544890087
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,32,32,128,1,float16,float16,0,0.04598933458328247
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,32,2,128,1,float16,fp8,0,0.02535466601451238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,32,4,128,1,float16,float16,0,0.03136533250411352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,32,4,128,1,float16,fp8,0,0.033530667424201965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,32,8,128,1,float16,float16,0,0.031632001201311745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,32,8,128,1,float16,fp8,0,0.033973333736260734
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,32,1,128,1,float16,fp8,0,0.016901332885026932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,32,1,128,1,float16,float16,0,0.017050666113694508
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,32,32,128,1,float16,fp8,0,0.029530666768550873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,32,2,128,1,float16,float16,0,0.01743999992807706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,32,32,128,1,float16,float16,0,0.023541333774725597
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,32,2,128,1,float16,fp8,0,0.01897066707412402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,32,4,128,1,float16,float16,0,0.020954666038354237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,32,4,128,1,float16,fp8,0,0.021429332594076794
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,32,8,128,1,float16,float16,0,0.02126399924357732
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,32,8,128,1,float16,fp8,0,0.023056000471115112
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,32,1,128,1,float16,float16,0,0.014981333166360855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,32,1,128,1,float16,fp8,0,0.015178666760524115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,32,2,128,1,float16,float16,0,0.01522133375207583
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,32,32,128,1,float16,fp8,0,0.023285334308942158
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,32,2,128,1,float16,fp8,0,0.01524266724785169
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,32,32,128,1,float16,float16,0,0.019018666197856266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,32,4,128,1,float16,float16,0,0.017077332983414333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,32,4,128,1,float16,fp8,0,0.018981333822011948
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,32,8,128,1,float16,float16,0,0.01766933376590411
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,32,8,128,1,float16,fp8,0,0.017717332889636356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,32,1,128,1,float16,float16,0,0.01525866612792015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,32,32,128,1,float16,fp8,0,0.018709332992633183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,32,1,128,1,float16,fp8,0,0.016917333006858826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,32,32,128,1,float16,float16,0,0.017136000096797943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,32,2,128,1,float16,float16,0,0.015301333119471868
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,32,2,128,1,float16,fp8,0,0.01522133375207583
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,32,4,128,1,float16,float16,0,0.014991999914248785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,32,4,128,1,float16,fp8,0,0.01522133375207583
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,32,8,128,1,float16,float16,0,0.015642666568358738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,32,8,128,1,float16,fp8,0,0.016810666769742966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,32,1,128,1,float16,float16,0,0.27607999245325726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,32,1,128,1,float16,fp8,0,0.24785067637761435
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,32,2,128,1,float16,float16,0,0.311082661151886
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,32,2,128,1,float16,fp8,0,0.28959999481836957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,32,4,128,1,float16,float16,0,0.45797332127888996
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,32,4,128,1,float16,fp8,0,0.42532265186309814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,32,8,128,1,float16,fp8,0,0.43864532311757404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,32,8,128,1,float16,float16,0,0.44059733549753827
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,32,32,128,1,float16,fp8,0,0.39825598398844403
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,32,32,128,1,float16,float16,0,0.34935998916625977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,32,1,128,1,float16,float16,0,0.14607999722162882
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,32,1,128,1,float16,fp8,0,0.13272000352541605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,32,2,128,1,float16,float16,0,0.16263999541600546
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,32,4,128,1,float16,fp8,0,0.23715200026830038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,32,4,128,1,float16,float16,0,0.2495946685473124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,32,2,128,1,float16,fp8,0,0.15239466230074564
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,32,32,128,1,float16,float16,0,0.19511467218399048
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,32,8,128,1,float16,float16,0,0.22890132665634155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,32,8,128,1,float16,fp8,0,0.23669866720835367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,32,1,128,1,float16,fp8,0,0.07236266632874806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,32,1,128,1,float16,float16,0,0.07975466549396515
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,32,32,128,1,float16,fp8,0,0.20669867595036825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,32,2,128,1,float16,float16,0,0.08894399801890056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,32,2,128,1,float16,fp8,0,0.08294933537642162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,32,4,128,1,float16,float16,0,0.12450666228930156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,32,4,128,1,float16,fp8,0,0.12486400206883748
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,32,8,128,1,float16,float16,0,0.12111999591191609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,32,8,128,1,float16,fp8,0,0.1367573340733846
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,32,32,128,1,float16,float16,0,0.1074666678905487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,32,1,128,1,float16,float16,0,0.04375466704368591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,32,32,128,1,float16,fp8,0,0.109333336353302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,32,1,128,1,float16,fp8,0,0.03955200066169103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,32,2,128,1,float16,fp8,0,0.04577066500981649
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,32,4,128,1,float16,float16,0,0.0580320010582606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,32,2,128,1,float16,float16,0,0.04670399924119314
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,32,4,128,1,float16,fp8,0,0.05989866455396017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,32,8,128,1,float16,float16,0,0.06609066824118297
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,32,8,128,1,float16,fp8,0,0.06228266656398773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,32,1,128,1,float16,float16,0,0.02831999957561493
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,32,32,128,1,float16,float16,0,0.05841066439946493
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,32,32,128,1,float16,fp8,0,0.05201066533724467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,32,1,128,1,float16,fp8,0,0.025514667232831318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,32,2,128,1,float16,float16,0,0.029626667499542236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,32,4,128,1,float16,float16,0,0.03362133353948593
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,32,2,128,1,float16,fp8,0,0.028602667152881622
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,32,4,128,1,float16,fp8,0,0.03550933301448822
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,32,8,128,1,float16,float16,0,0.03534399966398875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,32,8,128,1,float16,fp8,0,0.035786665976047516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,32,1,128,1,float16,float16,0,0.018863999595244724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,32,2,128,1,float16,float16,0,0.01924266666173935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,32,32,128,1,float16,float16,0,0.025573333104451496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,32,1,128,1,float16,fp8,0,0.017152000218629837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,32,32,128,1,float16,fp8,0,0.031258667508761086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,32,2,128,1,float16,fp8,0,0.01933866615096728
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,32,4,128,1,float16,fp8,0,0.023130667706330616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,32,4,128,1,float16,float16,0,0.021536000072956085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,32,8,128,1,float16,float16,0,0.021216000119845074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,32,8,128,1,float16,fp8,0,0.02325333406527837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,32,1,128,1,float16,float16,0,0.013050666699806849
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,32,1,128,1,float16,fp8,0,0.013242666920026144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,32,32,128,1,float16,fp8,0,0.021045332153638203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,32,2,128,1,float16,float16,0,0.01314666618903478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,32,32,128,1,float16,float16,0,0.017029333859682083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,32,2,128,1,float16,fp8,0,0.013557333499193192
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,32,4,128,1,float16,float16,0,0.014997333288192749
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,32,4,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,32,8,128,1,float16,float16,0,0.015077333897352219
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,32,8,128,1,float16,fp8,0,0.01703466723362605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,32,1,128,1,float16,fp8,0,0.013157332936922709
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,32,1,128,1,float16,float16,0,0.012879999975363413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,32,32,128,1,float16,float16,0,0.014896000425020853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,32,32,128,1,float16,fp8,0,0.015216000378131866
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,32,2,128,1,float16,float16,0,0.013253333667914072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,32,2,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,32,4,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,32,4,128,1,float16,fp8,0,0.013434667140245438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,32,8,128,1,float16,float16,0,0.013162666310866674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,32,8,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,32,1,128,1,float16,float16,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,32,1,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,32,2,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,32,2,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,32,32,128,1,float16,float16,0,0.013274667163689932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,32,32,128,1,float16,fp8,0,0.014938666174809137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,32,4,128,1,float16,float16,0,0.013248000293970108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,32,4,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,32,8,128,1,float16,float16,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,32,8,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,32,1,128,1,float16,fp8,0,0.20037333170572916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,32,1,128,1,float16,float16,0,0.2338506579399109
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,32,2,128,1,float16,fp8,0,0.22099733352661133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,32,2,128,1,float16,float16,0,0.2561440070470174
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,32,4,128,1,float16,float16,0,0.3302453358968099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,32,4,128,1,float16,fp8,0,0.32708267370859784
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,32,8,128,1,float16,float16,0,0.31795734167099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,32,8,128,1,float16,fp8,0,0.29918932914733887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,32,32,128,1,float16,fp8,0,0.24073066314061484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,32,32,128,1,float16,float16,0,0.23332800467809042
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,32,1,128,1,float16,fp8,0,0.10750400026639302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,32,1,128,1,float16,float16,0,0.12495467066764832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,32,4,128,1,float16,float16,0,0.1711733341217041
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,32,4,128,1,float16,fp8,0,0.161189337571462
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,32,2,128,1,float16,float16,0,0.1348373293876648
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,32,2,128,1,float16,fp8,0,0.11820266644159953
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,32,32,128,1,float16,float16,0,0.12156800429026286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,32,32,128,1,float16,fp8,0,0.12591999769210815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,32,8,128,1,float16,float16,0,0.1788853406906128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,32,8,128,1,float16,fp8,0,0.1585760017236074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,32,2,128,1,float16,float16,0,0.0697386662165324
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,32,1,128,1,float16,fp8,0,0.057861333092053734
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,32,1,128,1,float16,float16,0,0.06619200110435486
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,32,2,128,1,float16,fp8,0,0.06243200103441874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,32,4,128,1,float16,fp8,0,0.07702399790287018
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,32,4,128,1,float16,float16,0,0.08117333551247914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,32,8,128,1,float16,float16,0,0.09019200007120769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,32,8,128,1,float16,fp8,0,0.08270933230717976
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,32,1,128,1,float16,float16,0,0.03881066789229711
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,32,32,128,1,float16,float16,0,0.06630399823188782
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,32,32,128,1,float16,fp8,0,0.0602400004863739
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,32,1,128,1,float16,fp8,0,0.03376533339420954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,32,4,128,1,float16,float16,0,0.045797333121299744
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,32,2,128,1,float16,fp8,0,0.03583466758330663
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,32,2,128,1,float16,float16,0,0.039706667264302574
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,32,4,128,1,float16,fp8,0,0.043882668018341064
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,32,8,128,1,float16,fp8,0,0.04383466641108195
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,32,8,128,1,float16,float16,0,0.045781334241231285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,32,1,128,1,float16,float16,0,0.02568000058333079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,32,32,128,1,float16,float16,0,0.031471999982992806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,32,32,128,1,float16,fp8,0,0.035546667873859406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,32,1,128,1,float16,fp8,0,0.023200000325838726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,32,2,128,1,float16,float16,0,0.025461333493391674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,32,2,128,1,float16,fp8,0,0.023258666197458904
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,32,4,128,1,float16,float16,0,0.027615999182065327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,32,4,128,1,float16,fp8,0,0.027621333797772724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,32,8,128,1,float16,fp8,0,0.02788266787926356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,32,8,128,1,float16,float16,0,0.027349332968393963
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,32,1,128,1,float16,float16,0,0.016917333006858826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,32,32,128,1,float16,float16,0,0.02109866589307785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,32,1,128,1,float16,fp8,0,0.017055999487638474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,32,32,128,1,float16,fp8,0,0.02342933416366577
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,32,2,128,1,float16,float16,0,0.01674666628241539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,32,4,128,1,float16,float16,0,0.018917333334684372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,32,4,128,1,float16,fp8,0,0.018837332725524902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,32,2,128,1,float16,fp8,0,0.01728533332546552
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,32,8,128,1,float16,float16,0,0.018874666343132656
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,32,8,128,1,float16,fp8,0,0.01893866683046023
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,32,1,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,32,1,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,32,32,128,1,float16,float16,0,0.01482133318980535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,32,2,128,1,float16,float16,0,0.013178666432698568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,32,32,128,1,float16,fp8,0,0.014853333433469137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,32,2,128,1,float16,fp8,0,0.013306666165590286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,32,4,128,1,float16,fp8,0,0.013157332936922709
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,32,4,128,1,float16,float16,0,0.013221333424250284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,32,8,128,1,float16,float16,0,0.013061333447694778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,32,8,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,32,1,128,1,float16,float16,0,0.012752000242471695
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,32,1,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,32,2,128,1,float16,float16,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,32,32,128,1,float16,fp8,0,0.014906667172908783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,32,2,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,32,32,128,1,float16,float16,0,0.013130666067202887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,32,4,128,1,float16,float16,0,0.012863999853531519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,32,4,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,32,8,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,32,8,128,1,float16,float16,0,0.01322666679819425
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,32,1,128,1,float16,float16,0,0.012714666624863943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,32,1,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,32,2,128,1,float16,float16,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,32,2,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,32,32,128,1,float16,float16,0,0.012986666212479273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,32,32,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,32,4,128,1,float16,float16,0,0.013050666699806849
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,32,4,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,32,8,128,1,float16,float16,0,0.012917333592971167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,32,8,128,1,float16,fp8,0,0.01309866706530253
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,32,1,128,1,float16,fp8,0,0.17914666732152304
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,32,1,128,1,float16,float16,0,0.2186560034751892
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,32,2,128,1,float16,float16,0,0.22366400559743246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,32,2,128,1,float16,fp8,0,0.1898826758066813
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,32,4,128,1,float16,float16,0,0.26823999484380084
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,32,4,128,1,float16,fp8,0,0.2323360045750936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,32,8,128,1,float16,float16,0,0.27033066749572754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,32,8,128,1,float16,fp8,0,0.2432639996210734
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,32,1,128,1,float16,float16,0,0.11155733466148376
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,32,1,128,1,float16,fp8,0,0.09317333499590556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,32,32,128,1,float16,float16,0,0.1734559933344523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,32,32,128,1,float16,fp8,0,0.16099199652671814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,32,2,128,1,float16,float16,0,0.11779733498891194
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,32,2,128,1,float16,fp8,0,0.0972213347752889
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,32,4,128,1,float16,float16,0,0.13621866703033447
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,32,4,128,1,float16,fp8,0,0.11160533626874287
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,32,8,128,1,float16,float16,0,0.13682132959365845
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,32,8,128,1,float16,fp8,0,0.11413333813349406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,32,32,128,1,float16,fp8,0,0.08246933420499165
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,32,32,128,1,float16,float16,0,0.09296533465385437
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,32,1,128,1,float16,float16,0,0.06011199951171875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,32,1,128,1,float16,fp8,0,0.05219733218352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,32,2,128,1,float16,float16,0,0.06298133234182994
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,32,2,128,1,float16,fp8,0,0.05393599967161814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,32,4,128,1,float16,float16,0,0.06841599941253662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,32,4,128,1,float16,fp8,0,0.0621013343334198
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,32,8,128,1,float16,float16,0,0.0684799998998642
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,32,8,128,1,float16,fp8,0,0.06211199859778086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,32,1,128,1,float16,float16,0,0.03568533311287562
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,32,32,128,1,float16,float16,0,0.043712000052134194
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,32,1,128,1,float16,fp8,0,0.031498665610949196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,32,32,128,1,float16,fp8,0,0.04393066465854645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,32,2,128,1,float16,float16,0,0.03754133234421412
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,32,2,128,1,float16,fp8,0,0.033530667424201965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,32,4,128,1,float16,float16,0,0.03941333293914795
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,32,4,128,1,float16,fp8,0,0.03583999971548716
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,32,8,128,1,float16,float16,0,0.03982399900754293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,32,8,128,1,float16,fp8,0,0.03623999903599421
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,32,1,128,1,float16,float16,0,0.023418667415777843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,32,32,128,1,float16,float16,0,0.025626666843891144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,32,1,128,1,float16,fp8,0,0.021157334248224895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,32,32,128,1,float16,fp8,0,0.028416000306606293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,32,2,128,1,float16,float16,0,0.023418667415777843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,32,2,128,1,float16,fp8,0,0.021322667598724365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,32,4,128,1,float16,float16,0,0.025274666647116344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,32,4,128,1,float16,fp8,0,0.02332266668478648
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,32,8,128,1,float16,float16,0,0.02533866713444392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,32,8,128,1,float16,fp8,0,0.0235359991590182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,32,1,128,1,float16,float16,0,0.016986666868130367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,32,32,128,1,float16,float16,0,0.017008000363906223
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,32,32,128,1,float16,fp8,0,0.019061333189407986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,32,1,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,32,2,128,1,float16,float16,0,0.018805333723624546
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,32,2,128,1,float16,fp8,0,0.015279999623696009
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,32,4,128,1,float16,float16,0,0.017103999853134155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,32,4,128,1,float16,fp8,0,0.016042667130629223
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,32,8,128,1,float16,float16,0,0.01711999997496605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,32,8,128,1,float16,fp8,0,0.017045332739750545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,32,1,128,1,float16,float16,0,0.013440000514189402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,32,32,128,1,float16,float16,0,0.015669333438078564
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,32,1,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,32,2,128,1,float16,float16,0,0.013258667041858038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,32,32,128,1,float16,fp8,0,0.014938666174809137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,32,2,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,32,4,128,1,float16,float16,0,0.013503999759753546
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,32,4,128,1,float16,fp8,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,32,8,128,1,float16,float16,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,32,8,128,1,float16,fp8,0,0.013317332913478216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,32,1,128,1,float16,float16,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,32,1,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,32,32,128,1,float16,float16,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,32,32,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,32,2,128,1,float16,float16,0,0.012954667210578918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,32,2,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,32,4,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,32,8,128,1,float16,float16,0,0.012762666990359625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,32,4,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,32,8,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,32,1,128,1,float16,float16,0,0.011733333269755045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,32,1,128,1,float16,fp8,0,0.011274666835864386
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,32,32,128,1,float16,fp8,0,0.013306666165590286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,32,2,128,1,float16,fp8,0,0.011194666226704916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,32,2,128,1,float16,float16,0,0.011930666863918304
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,32,32,128,1,float16,float16,0,0.013151999562978745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,32,4,128,1,float16,float16,0,0.012154666086037954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,32,4,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,32,8,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,32,8,128,1,float16,float16,0,0.012602667013804117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,32,1,128,1,float16,fp8,0,0.16288000345230103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,32,1,128,1,float16,float16,0,0.20829866329828897
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,32,2,128,1,float16,float16,0,0.2165600061416626
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,32,2,128,1,float16,fp8,0,0.16716265678405762
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,32,4,128,1,float16,float16,0,0.22955199082692465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,32,4,128,1,float16,fp8,0,0.18832000096638998
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,32,8,128,1,float16,float16,0,0.23057599862416586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,32,8,128,1,float16,fp8,0,0.1919999917348226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,32,32,128,1,float16,float16,0,0.1388106644153595
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,32,32,128,1,float16,fp8,0,0.12337600191434224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,32,1,128,1,float16,float16,0,0.11060800155003865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,32,1,128,1,float16,fp8,0,0.08684800068537395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,32,2,128,1,float16,fp8,0,0.08933333555857341
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,32,2,128,1,float16,float16,0,0.10849600036938985
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,32,4,128,1,float16,float16,0,0.11735999584197998
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,32,4,128,1,float16,fp8,0,0.09738666812578838
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,32,8,128,1,float16,float16,0,0.1183093289534251
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,32,8,128,1,float16,fp8,0,0.09723200400670369
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,32,32,128,1,float16,float16,0,0.07670400043328603
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,32,1,128,1,float16,float16,0,0.060378665725390114
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,32,1,128,1,float16,fp8,0,0.04809600114822388
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,32,2,128,1,float16,float16,0,0.060880000392595925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,32,32,128,1,float16,fp8,0,0.06195199986298879
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,32,2,128,1,float16,fp8,0,0.05171733101209005
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,32,4,128,1,float16,float16,0,0.06233066817124685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,32,4,128,1,float16,fp8,0,0.054287999868392944
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,32,8,128,1,float16,float16,0,0.06262399752934773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,32,8,128,1,float16,fp8,0,0.0558240016301473
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,32,32,128,1,float16,float16,0,0.03772266705830892
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,32,1,128,1,float16,float16,0,0.03568000098069509
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,32,1,128,1,float16,fp8,0,0.03133866687615713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,32,32,128,1,float16,fp8,0,0.03750933210055033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,32,2,128,1,float16,fp8,0,0.029440000653266907
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,32,2,128,1,float16,float16,0,0.03550933301448822
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,32,4,128,1,float16,float16,0,0.03743999948104223
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,32,4,128,1,float16,fp8,0,0.033471999069054924
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,32,8,128,1,float16,float16,0,0.03581333408753077
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,32,8,128,1,float16,fp8,0,0.03153600047032038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,32,32,128,1,float16,float16,0,0.02458133300145467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,32,1,128,1,float16,float16,0,0.023269332945346832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,32,1,128,1,float16,fp8,0,0.021104000508785248
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,32,32,128,1,float16,fp8,0,0.023567999402681988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,32,2,128,1,float16,float16,0,0.023408000667889912
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,32,2,128,1,float16,fp8,0,0.021066665649414062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,32,4,128,1,float16,float16,0,0.02327999969323476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,32,4,128,1,float16,fp8,0,0.02145066608985265
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,32,8,128,1,float16,float16,0,0.023258666197458904
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,32,8,128,1,float16,fp8,0,0.021146667500336964
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,32,1,128,1,float16,float16,0,0.017029333859682083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,32,1,128,1,float16,fp8,0,0.014975999792416891
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,32,32,128,1,float16,float16,0,0.01699200024207433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,32,32,128,1,float16,fp8,0,0.01736533393462499
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,32,2,128,1,float16,float16,0,0.016906666258970898
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,32,2,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,32,4,128,1,float16,float16,0,0.016986666868130367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,32,4,128,1,float16,fp8,0,0.015274666249752045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,32,8,128,1,float16,fp8,0,0.014981333166360855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,32,8,128,1,float16,float16,0,0.015813333292802174
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,32,1,128,1,float16,float16,0,0.014896000425020853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,32,32,128,1,float16,fp8,0,0.014746667196353277
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,32,32,128,1,float16,float16,0,0.013397333522637686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,32,1,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,32,2,128,1,float16,float16,0,0.013125333935022354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,32,2,128,1,float16,fp8,0,0.012789333860079447
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,32,4,128,1,float16,float16,0,0.012938667088747025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,32,4,128,1,float16,fp8,0,0.012938667088747025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,32,8,128,1,float16,float16,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,32,8,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,32,1,128,1,float16,float16,0,0.011018666128317514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,32,1,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,32,32,128,1,float16,fp8,0,0.014826666563749313
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,32,32,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,32,2,128,1,float16,float16,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,32,2,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,32,4,128,1,float16,float16,0,0.012266666938861212
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,32,4,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,32,8,128,1,float16,float16,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,32,8,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,32,1,128,1,float16,float16,0,0.010992000500361124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,32,1,128,1,float16,fp8,0,0.010901333143313726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,32,32,128,1,float16,fp8,0,0.013199999928474426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,32,32,128,1,float16,float16,0,0.013173333058754602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,32,2,128,1,float16,float16,0,0.010965333630641302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,32,2,128,1,float16,fp8,0,0.012298667182525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,32,4,128,1,float16,float16,0,0.01102399950226148
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,32,4,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,32,8,128,1,float16,float16,0,0.012245333443085352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,32,8,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,0,0.01642666632930438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,0,0.015471999843915304
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,0,0.01923199991385142
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,0,0.017301333447297413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,0,0.027327999472618103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,32,32,128,1,float16,float16,0,0.05606399973233541
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,32,32,128,1,float16,fp8,0,0.041509332756201424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,0,0.025306666890780132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,0,0.015066667149464289
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,0,0.013343999783198038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,0,0.017445333302021027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,0,0.018911999960740406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,32,32,128,1,float16,float16,0,0.029530666768550873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,32,32,128,1,float16,fp8,0,0.025242666403452556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,0,0.01080000028014183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,0,0.011109333485364914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,0,0.014954666296641031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,32,32,128,1,float16,float16,0,0.019237333287795384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,32,32,128,1,float16,fp8,0,0.019199999670187633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,0,0.010703999549150467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,0,0.010960000256697336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,0,0.011717333147923151
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,0,0.010933333386977514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,0,0.01090666651725769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,32,32,128,1,float16,float16,0,0.015029333531856537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,32,32,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,0,0.009632000078757605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,0,0.010682666053374609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,0,0.010981333752473196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,0,0.010837333897749582
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,0,0.010650667051474253
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,0,0.010858666151762009
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,32,32,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,0,0.010960000256697336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,32,32,128,1,float16,float16,0,0.012714666624863943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,0,0.010853332777818045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,0,0.009904000287254652
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,0,0.010570666442314783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,0,0.010768000036478043
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,32,32,128,1,float16,fp8,0,0.011354666203260422
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,0,0.010591999938090643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,0,0.010687999427318573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,32,32,128,1,float16,float16,0,0.01080000028014183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,0,0.010165333126982054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,0,0.010832000523805618
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,0,0.010863999525705973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,32,32,128,1,float16,float16,0,0.011109333485364914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,32,32,128,1,float16,fp8,0,0.012442667037248611
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,0,0.010970667004585266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,0,0.010847999403874079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,0,0.010778666784365972
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,0,0.010549332946538925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,32,32,128,1,float16,float16,0,0.010954666882753372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,32,32,128,1,float16,fp8,0,0.011941333611806234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,0,0.008992000172535578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,0,0.010805333654085795
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,0,0.011071999867757162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,0,0.009103999783595404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,0,0.010709332923094431
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,0,0.010928000013033548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,24,1,128,1,float16,fp8,0,7.9155629475911455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,24,2,128,1,float16,float16,0,14.725098927815756
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,24,1,128,1,float16,float16,0,15.14028803507487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,24,2,128,1,float16,fp8,0,8.978874842325846
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,24,4,128,1,float16,float16,0,14.577621459960938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,24,4,128,1,float16,fp8,0,8.748501459757486
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,24,8,128,1,float16,fp8,0,9.26431973775228
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,24,8,128,1,float16,float16,0,14.898778279622396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,24,24,128,1,float16,float16,0,5.474816004435222
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,24,24,128,1,float16,fp8,0,4.915221214294434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,24,1,128,1,float16,float16,0,5.044986724853516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,24,1,128,1,float16,fp8,0,4.1795040766398115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,24,2,128,1,float16,fp8,0,4.056122779846191
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,24,2,128,1,float16,float16,0,4.899034818013509
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,24,4,128,1,float16,float16,0,6.308575948079427
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,24,4,128,1,float16,fp8,0,4.431898752848308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,24,8,128,1,float16,float16,0,5.07752005259196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,24,8,128,1,float16,fp8,0,4.370143890380859
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,24,24,128,1,float16,float16,0,2.8220532735188804
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,24,24,128,1,float16,fp8,0,2.5188426971435547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,24,1,128,1,float16,fp8,0,2.0676053365071616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,24,1,128,1,float16,float16,0,3.010357220967611
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,24,2,128,1,float16,float16,0,2.8627039591471353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,24,2,128,1,float16,fp8,0,2.0877280235290527
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,24,4,128,1,float16,float16,0,2.683685302734375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,24,4,128,1,float16,fp8,0,2.4463466008504233
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,24,8,128,1,float16,float16,0,2.694064140319824
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,24,8,128,1,float16,fp8,0,2.2849334081014
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,24,24,128,1,float16,float16,0,1.4547467231750488
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,24,1,128,1,float16,fp8,0,1.0978506406148274
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,24,1,128,1,float16,float16,0,1.3449974060058594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,24,24,128,1,float16,fp8,0,1.3293813069661458
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,24,2,128,1,float16,fp8,0,1.1181386311848958
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,24,2,128,1,float16,float16,0,1.2922133604685466
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,24,4,128,1,float16,float16,0,1.412549336751302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,24,4,128,1,float16,fp8,0,1.2226880391438801
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,24,8,128,1,float16,float16,0,1.350538730621338
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,24,8,128,1,float16,fp8,0,1.2091519832611084
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,24,1,128,1,float16,fp8,0,4.599274635314941
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,24,2,128,1,float16,fp8,0,4.742176055908203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,24,2,128,1,float16,float16,0,5.578896204630534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,24,1,128,1,float16,float16,0,10.24996821085612
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,24,4,128,1,float16,float16,0,7.038794835408528
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,24,4,128,1,float16,fp8,0,5.305525461832683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,24,8,128,1,float16,float16,0,6.066522598266602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,24,8,128,1,float16,fp8,0,5.474175771077474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,24,24,128,1,float16,fp8,0,3.051738739013672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,24,24,128,1,float16,float16,0,3.2166452407836914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,24,1,128,1,float16,fp8,0,2.37172269821167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,24,1,128,1,float16,float16,0,3.030069351196289
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,24,2,128,1,float16,float16,0,2.748431841532389
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,24,2,128,1,float16,fp8,0,2.7830559412638345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,24,4,128,1,float16,float16,0,3.063546816507975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,24,4,128,1,float16,fp8,0,3.8427734375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,24,8,128,1,float16,fp8,0,2.62554661432902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,24,8,128,1,float16,float16,0,4.041776021321614
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,24,24,128,1,float16,fp8,0,1.5721279780069988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,24,24,128,1,float16,float16,0,2.109648068745931
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,24,1,128,1,float16,float16,0,1.4212640126546223
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,24,1,128,1,float16,fp8,0,1.6424320538838704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,24,2,128,1,float16,float16,0,1.4408799807230632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,24,2,128,1,float16,fp8,0,1.422416051228841
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,24,4,128,1,float16,float16,0,1.588304042816162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,24,4,128,1,float16,fp8,0,1.3793706893920898
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,24,8,128,1,float16,float16,0,1.5293067296346028
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,24,8,128,1,float16,fp8,0,1.3772053718566895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,24,24,128,1,float16,float16,0,0.9099307060241699
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,24,1,128,1,float16,float16,0,0.977567990620931
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,24,24,128,1,float16,fp8,0,0.8467946847279867
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,24,1,128,1,float16,fp8,0,0.6746079921722412
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,24,2,128,1,float16,float16,0,0.7979253133138021
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,24,2,128,1,float16,fp8,0,0.9472533067067465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,24,4,128,1,float16,float16,0,0.8459839820861816
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,24,4,128,1,float16,fp8,0,0.7529973189036051
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,24,8,128,1,float16,fp8,0,0.8490346272786459
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,24,8,128,1,float16,float16,0,0.8379840056101481
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,24,1,128,1,float16,fp8,0,3.2663892110188804
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,24,1,128,1,float16,float16,0,3.737157185872396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,24,2,128,1,float16,float16,0,4.028533299763997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,24,2,128,1,float16,fp8,0,3.3762505849202475
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,24,4,128,1,float16,float16,0,5.1106611887613935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,24,4,128,1,float16,fp8,0,3.6496639251708984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,24,8,128,1,float16,float16,0,4.3638505935668945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,24,24,128,1,float16,float16,0,2.379269282023112
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,24,8,128,1,float16,fp8,0,3.7877171834309897
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,24,24,128,1,float16,fp8,0,2.2683146794637046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,24,1,128,1,float16,float16,0,2.4853493372599282
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,24,1,128,1,float16,fp8,0,1.9301013946533203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,24,2,128,1,float16,float16,0,1.9694506327311199
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,24,2,128,1,float16,fp8,0,1.7399519284566243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,24,4,128,1,float16,fp8,0,1.9004693031311035
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,24,4,128,1,float16,float16,0,2.2263573010762534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,24,8,128,1,float16,float16,0,2.1251413027445474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,24,8,128,1,float16,fp8,0,1.9172906875610352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,24,1,128,1,float16,fp8,0,0.8994346459706625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,24,24,128,1,float16,float16,0,1.251904010772705
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,24,24,128,1,float16,fp8,0,1.1834932963053386
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,24,1,128,1,float16,float16,0,1.1667786439259846
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,24,2,128,1,float16,float16,0,1.0602933565775554
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,24,2,128,1,float16,fp8,0,0.9199999968210856
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,24,4,128,1,float16,fp8,0,1.0467840035756428
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,24,4,128,1,float16,float16,0,1.1672693093617756
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,24,8,128,1,float16,float16,0,1.1326719919840496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,24,8,128,1,float16,fp8,0,1.0105866591135662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,24,24,128,1,float16,float16,0,0.6807946364084879
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,24,24,128,1,float16,fp8,0,0.642906665802002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,24,1,128,1,float16,fp8,0,0.49134401480356854
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,24,1,128,1,float16,float16,0,0.5753440062204996
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,24,2,128,1,float16,float16,0,0.5866080125172933
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,24,2,128,1,float16,fp8,0,0.5064053138097128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,24,4,128,1,float16,float16,0,0.6255520184834799
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,24,4,128,1,float16,fp8,0,0.5592746734619141
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,24,8,128,1,float16,float16,0,0.6286720037460327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,24,8,128,1,float16,fp8,0,0.561029314994812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,24,1,128,1,float16,fp8,0,4.275834719340007
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,24,1,128,1,float16,float16,0,4.846650759379069
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,24,2,128,1,float16,fp8,0,4.491722742716472
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,24,2,128,1,float16,float16,0,6.7755788167317705
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,24,4,128,1,float16,float16,0,6.217360178629558
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,24,4,128,1,float16,fp8,0,5.169765472412109
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,24,8,128,1,float16,float16,0,7.290463765462239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,24,8,128,1,float16,fp8,0,5.108298619588216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,24,1,128,1,float16,fp8,0,2.1950720151265464
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,24,24,128,1,float16,fp8,0,3.120181401570638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,24,24,128,1,float16,float16,0,3.276480038960775
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,24,1,128,1,float16,float16,0,2.5806399981180825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,24,2,128,1,float16,fp8,0,2.2525439262390137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,24,2,128,1,float16,float16,0,2.7053651809692383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,24,4,128,1,float16,fp8,0,2.6710131963094077
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,24,4,128,1,float16,float16,0,3.131061236063639
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,24,8,128,1,float16,fp8,0,2.818277359008789
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,24,8,128,1,float16,float16,0,2.8780692418416343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,24,24,128,1,float16,float16,0,1.6927199363708496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,24,24,128,1,float16,fp8,0,1.6053013801574707
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,24,1,128,1,float16,float16,0,1.308725357055664
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,24,1,128,1,float16,fp8,0,1.594213326772054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,24,2,128,1,float16,float16,0,1.384698708852132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,24,2,128,1,float16,fp8,0,1.1810613473256428
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,24,4,128,1,float16,float16,0,1.4792213439941406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,24,4,128,1,float16,fp8,0,1.310213327407837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,24,8,128,1,float16,fp8,0,1.3741599718729656
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,24,8,128,1,float16,float16,0,1.5302507082621257
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,24,1,128,1,float16,float16,0,0.7052319844563802
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,24,24,128,1,float16,float16,0,0.8852693239847819
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,24,1,128,1,float16,fp8,0,0.6170986493428549
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,24,24,128,1,float16,fp8,0,0.8428053061167399
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,24,2,128,1,float16,float16,0,0.7258986632029215
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,24,2,128,1,float16,fp8,0,0.633621335029602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,24,4,128,1,float16,float16,0,0.8285973072052002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,24,4,128,1,float16,fp8,0,0.7325440247853597
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,24,8,128,1,float16,float16,0,0.7823839982350668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,24,8,128,1,float16,fp8,0,0.7105653285980225
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,24,24,128,1,float16,float16,0,0.485210657119751
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,24,24,128,1,float16,fp8,0,0.4666453202565511
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,24,1,128,1,float16,float16,0,0.3936213254928589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,24,1,128,1,float16,fp8,0,0.3455359935760498
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,24,2,128,1,float16,float16,0,0.4078666766484578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,24,2,128,1,float16,fp8,0,0.35161598523457843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,24,4,128,1,float16,fp8,0,0.3870933453241984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,24,4,128,1,float16,float16,0,0.4451253414154053
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,24,8,128,1,float16,fp8,0,0.4070719877878825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,24,8,128,1,float16,float16,0,0.43856533368428546
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,24,1,128,1,float16,fp8,0,2.5582666397094727
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,24,1,128,1,float16,float16,0,2.875573476155599
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,24,2,128,1,float16,fp8,0,2.658064047495524
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,24,2,128,1,float16,float16,0,2.972112019856771
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,24,4,128,1,float16,float16,0,3.783583958943685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,24,4,128,1,float16,fp8,0,3.1794560750325522
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,24,8,128,1,float16,float16,0,3.763573328653971
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,24,8,128,1,float16,fp8,0,3.093973477681478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,24,1,128,1,float16,fp8,0,1.309658686319987
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,24,24,128,1,float16,fp8,0,2.0262559254964194
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,24,24,128,1,float16,float16,0,2.0405707359313965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,24,1,128,1,float16,float16,0,1.5301119486490886
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,24,2,128,1,float16,float16,0,1.53220796585083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,24,2,128,1,float16,fp8,0,1.3799519538879395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,24,4,128,1,float16,fp8,0,1.6067627271016438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,24,4,128,1,float16,float16,0,1.789456049601237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,24,8,128,1,float16,float16,0,1.9677386283874512
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,24,8,128,1,float16,fp8,0,1.6168479919433594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,24,24,128,1,float16,float16,0,1.0589653650919597
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,24,1,128,1,float16,float16,0,0.8118453025817871
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,24,1,128,1,float16,fp8,0,0.6973439852396647
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,24,24,128,1,float16,fp8,0,1.041162649790446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,24,2,128,1,float16,float16,0,0.8320960203806559
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,24,2,128,1,float16,fp8,0,0.7229333718617758
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,24,4,128,1,float16,float16,0,0.9477919737497965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,24,4,128,1,float16,fp8,0,0.8481386502583822
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,24,8,128,1,float16,float16,0,0.926256020863851
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,24,8,128,1,float16,fp8,0,0.8502506415049235
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,24,24,128,1,float16,float16,0,0.569109320640564
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,24,24,128,1,float16,fp8,0,0.5560213327407837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,24,1,128,1,float16,float16,0,0.43680532773335773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,24,1,128,1,float16,fp8,0,0.3813600142796834
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,24,2,128,1,float16,fp8,0,0.4023840030034383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,24,2,128,1,float16,float16,0,0.4557226498921712
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,24,4,128,1,float16,float16,0,0.5119253396987915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,24,4,128,1,float16,fp8,0,0.46006401379903156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,24,8,128,1,float16,fp8,0,0.45839468638102215
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,24,8,128,1,float16,float16,0,0.4983893235524495
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,24,1,128,1,float16,float16,0,0.24736533562342325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,24,24,128,1,float16,float16,0,0.3176426688830058
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,24,1,128,1,float16,fp8,0,0.21983999013900757
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,24,24,128,1,float16,fp8,0,0.3129280010859172
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,24,2,128,1,float16,float16,0,0.25205334027608234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,24,2,128,1,float16,fp8,0,0.22722133000691733
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,24,4,128,1,float16,float16,0,0.27197333176930744
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,24,4,128,1,float16,fp8,0,0.25355199972788495
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,24,8,128,1,float16,float16,0,0.28567999601364136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,24,8,128,1,float16,fp8,0,0.2499786615371704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,24,1,128,1,float16,float16,0,2.7358614603678384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,24,2,128,1,float16,float16,0,2.95796267191569
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,24,2,128,1,float16,fp8,0,2.6161600748697915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,24,1,128,1,float16,fp8,0,2.4836479822794595
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,24,4,128,1,float16,fp8,0,3.382495880126953
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,24,8,128,1,float16,float16,0,3.5314881006876626
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,24,4,128,1,float16,float16,0,4.143258730570476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,24,8,128,1,float16,fp8,0,3.2110026677449546
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,24,1,128,1,float16,float16,0,1.4143786430358887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,24,1,128,1,float16,fp8,0,1.2689706484476726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,24,24,128,1,float16,float16,0,2.1458187103271484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,24,24,128,1,float16,fp8,0,2.1983200709025064
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,24,2,128,1,float16,float16,0,1.6111253102620442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,24,2,128,1,float16,fp8,0,1.3549173672993977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,24,4,128,1,float16,float16,0,1.7613226572672527
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,24,4,128,1,float16,fp8,0,1.748037338256836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,24,8,128,1,float16,fp8,0,1.662021319071452
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,24,8,128,1,float16,float16,0,1.7403465906778972
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,24,24,128,1,float16,float16,0,1.1131146748860676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,24,1,128,1,float16,fp8,0,0.6597280104955038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,24,1,128,1,float16,float16,0,0.7512853145599365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,24,24,128,1,float16,fp8,0,1.1264320214589436
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,24,2,128,1,float16,float16,0,0.7924959659576416
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,24,2,128,1,float16,fp8,0,0.7049333254496256
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,24,4,128,1,float16,float16,0,0.9291573365529379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,24,4,128,1,float16,fp8,0,0.8532000382741293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,24,8,128,1,float16,fp8,0,0.8528746763865153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,24,8,128,1,float16,float16,0,0.9548213481903076
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,24,24,128,1,float16,float16,0,0.5893386602401733
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,24,24,128,1,float16,fp8,0,0.5955413182576498
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,24,1,128,1,float16,float16,0,0.4058239857355754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,24,1,128,1,float16,fp8,0,0.3593653440475464
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,24,2,128,1,float16,float16,0,0.4275519847869873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,24,2,128,1,float16,fp8,0,0.3854986826578776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,24,4,128,1,float16,fp8,0,0.48163731892903644
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,24,4,128,1,float16,float16,0,0.5001226663589478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,24,8,128,1,float16,float16,0,0.49792532126108807
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,24,8,128,1,float16,fp8,0,0.45999467372894287
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,24,24,128,1,float16,fp8,0,0.3205173412958781
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,24,24,128,1,float16,float16,0,0.32333866755167645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,24,1,128,1,float16,float16,0,0.22659200429916382
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,24,1,128,1,float16,fp8,0,0.19964800278345743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,24,2,128,1,float16,float16,0,0.23653866847356161
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,24,2,128,1,float16,fp8,0,0.21272534132003784
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,24,4,128,1,float16,fp8,0,0.2429813345273336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,24,4,128,1,float16,float16,0,0.2754506667455037
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,24,8,128,1,float16,float16,0,0.2746986746788025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,24,8,128,1,float16,fp8,0,0.2542666594187419
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,24,24,128,1,float16,float16,0,0.18194133043289185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,24,1,128,1,float16,float16,0,0.1327359974384308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,24,24,128,1,float16,fp8,0,0.18050666650136313
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,24,1,128,1,float16,fp8,0,0.12043733398119609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,24,2,128,1,float16,fp8,0,0.12683199842770895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,24,4,128,1,float16,float16,0,0.1466506620248159
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,24,4,128,1,float16,fp8,0,0.14140266180038452
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,24,2,128,1,float16,float16,0,0.13753599921862283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,24,8,128,1,float16,fp8,0,0.1423466702302297
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,24,8,128,1,float16,float16,0,0.14857066671053568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,24,1,128,1,float16,fp8,0,1.515600045522054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,24,1,128,1,float16,float16,0,1.688202699025472
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,24,2,128,1,float16,float16,0,1.7883200645446777
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,24,2,128,1,float16,fp8,0,1.643141269683838
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,24,4,128,1,float16,fp8,0,2.0258399645487466
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,24,4,128,1,float16,float16,0,2.297621409098307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,24,8,128,1,float16,fp8,0,2.071125348409017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,24,8,128,1,float16,float16,0,2.1797919273376465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,24,1,128,1,float16,float16,0,0.8852480252583822
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,24,24,128,1,float16,float16,0,1.4287999471028645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,24,24,128,1,float16,fp8,0,1.4856106440226238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,24,1,128,1,float16,fp8,0,0.7883946895599365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,24,2,128,1,float16,float16,0,0.93339737256368
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,24,2,128,1,float16,fp8,0,0.8495039939880371
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,24,4,128,1,float16,float16,0,1.1459626356760662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,24,4,128,1,float16,fp8,0,1.1488479773203533
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,24,8,128,1,float16,float16,0,1.1878186861673992
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,24,8,128,1,float16,fp8,0,1.0593173503875732
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,24,24,128,1,float16,float16,0,0.7389012972513834
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,24,1,128,1,float16,float16,0,0.4712800184885661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,24,24,128,1,float16,fp8,0,0.7703626950581869
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,24,1,128,1,float16,fp8,0,0.41845866044362384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,24,2,128,1,float16,float16,0,0.49526933828989667
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,24,2,128,1,float16,fp8,0,0.4503893454869588
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,24,4,128,1,float16,float16,0,0.6215306520462036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,24,4,128,1,float16,fp8,0,0.5643466711044312
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,24,8,128,1,float16,float16,0,0.5958986679712931
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,24,8,128,1,float16,fp8,0,0.5602560043334961
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,24,24,128,1,float16,float16,0,0.39673598607381183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,24,1,128,1,float16,float16,0,0.25756265719731647
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,24,24,128,1,float16,fp8,0,0.40655465920766193
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,24,2,128,1,float16,fp8,0,0.25406400362650555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,24,1,128,1,float16,fp8,0,0.2312906583150228
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,24,2,128,1,float16,float16,0,0.27856000264485675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,24,4,128,1,float16,float16,0,0.3335839907328288
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,24,4,128,1,float16,fp8,0,0.31590932607650757
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,24,8,128,1,float16,fp8,0,0.31802666187286377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,24,8,128,1,float16,float16,0,0.3243359923362732
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,24,1,128,1,float16,float16,0,0.14320533474286398
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,24,1,128,1,float16,fp8,0,0.13144000371297201
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,24,24,128,1,float16,float16,0,0.2164693276087443
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,24,2,128,1,float16,float16,0,0.14804800351460776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,24,24,128,1,float16,fp8,0,0.22313066323598227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,24,4,128,1,float16,float16,0,0.172325332959493
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,24,2,128,1,float16,fp8,0,0.14014933506647745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,24,4,128,1,float16,fp8,0,0.16220266620318094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,24,8,128,1,float16,float16,0,0.18714666366577148
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,24,8,128,1,float16,fp8,0,0.16337066888809204
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,24,1,128,1,float16,float16,0,0.08849599957466125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,24,24,128,1,float16,float16,0,0.12755733728408813
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,24,1,128,1,float16,fp8,0,0.08328533172607422
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,24,24,128,1,float16,fp8,0,0.11737599968910217
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,24,2,128,1,float16,float16,0,0.09173333644866943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,24,2,128,1,float16,fp8,0,0.08740267157554626
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,24,4,128,1,float16,float16,0,0.10233599940935771
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,24,4,128,1,float16,fp8,0,0.09706133604049683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,24,8,128,1,float16,float16,0,0.10173333684603374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,24,8,128,1,float16,fp8,0,0.09967466195424397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,24,1,128,1,float16,float16,0,1.6965386072794597
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,24,1,128,1,float16,fp8,0,1.542474587758382
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,24,2,128,1,float16,float16,0,1.830575942993164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,24,2,128,1,float16,fp8,0,1.7229013442993164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,24,4,128,1,float16,float16,0,2.527344067891439
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,24,4,128,1,float16,fp8,0,2.4780373573303223
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,24,8,128,1,float16,float16,0,2.3733226458231607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,24,8,128,1,float16,fp8,0,2.2873973846435547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,24,1,128,1,float16,float16,0,0.8855146567026774
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,24,24,128,1,float16,float16,0,1.6027785936991374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,24,1,128,1,float16,fp8,0,0.8028266429901123
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,24,24,128,1,float16,fp8,0,1.7286186218261719
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,24,2,128,1,float16,float16,0,0.946400006612142
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,24,2,128,1,float16,fp8,0,0.8831413586934408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,24,4,128,1,float16,float16,0,1.2610186735788982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,24,4,128,1,float16,fp8,0,1.2807892958323162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,24,8,128,1,float16,float16,0,1.2248693307240803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,24,8,128,1,float16,fp8,0,1.2672213713328044
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,24,24,128,1,float16,float16,0,0.8293600082397461
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,24,24,128,1,float16,fp8,0,0.8889813423156738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,24,1,128,1,float16,float16,0,0.47205865383148193
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,24,1,128,1,float16,fp8,0,0.41969064871470135
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,24,2,128,1,float16,float16,0,0.510645349820455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,24,2,128,1,float16,fp8,0,0.4644533395767212
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,24,4,128,1,float16,float16,0,0.6607999801635742
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,24,4,128,1,float16,fp8,0,0.5980693499247233
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,24,8,128,1,float16,float16,0,0.6411199967066447
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,24,8,128,1,float16,fp8,0,0.6455466747283936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,24,24,128,1,float16,float16,0,0.43747198581695557
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,24,1,128,1,float16,float16,0,0.25286932786305744
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,24,1,128,1,float16,fp8,0,0.22786666949590048
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,24,24,128,1,float16,fp8,0,0.46293866634368896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,24,2,128,1,float16,fp8,0,0.25387199719746906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,24,2,128,1,float16,float16,0,0.27897600332895917
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,24,4,128,1,float16,float16,0,0.35672001043955487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,24,4,128,1,float16,fp8,0,0.35321064790089923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,24,8,128,1,float16,float16,0,0.336575984954834
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,24,8,128,1,float16,fp8,0,0.3238079945246379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,24,24,128,1,float16,float16,0,0.2355466683705648
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,24,1,128,1,float16,float16,0,0.14082133769989014
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,24,1,128,1,float16,fp8,0,0.12481600046157837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,24,24,128,1,float16,fp8,0,0.2484053373336792
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,24,2,128,1,float16,fp8,0,0.13634666800498962
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,24,2,128,1,float16,float16,0,0.15289599696795145
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,24,4,128,1,float16,fp8,0,0.16946667432785034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,24,4,128,1,float16,float16,0,0.1869706710179647
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,24,8,128,1,float16,float16,0,0.1958720088005066
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,24,8,128,1,float16,fp8,0,0.18993600209554037
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,24,1,128,1,float16,fp8,0,0.07543466488520305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,24,1,128,1,float16,float16,0,0.0814879983663559
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,24,24,128,1,float16,float16,0,0.13762666781743368
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,24,24,128,1,float16,fp8,0,0.13567999998728433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,24,2,128,1,float16,float16,0,0.08565866947174072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,24,4,128,1,float16,float16,0,0.09557333588600159
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,24,2,128,1,float16,fp8,0,0.08239999910195668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,24,4,128,1,float16,fp8,0,0.09637332955996196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,24,8,128,1,float16,float16,0,0.09899733463923137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,24,8,128,1,float16,fp8,0,0.09892266988754272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,24,1,128,1,float16,float16,0,0.05514666438102722
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,24,24,128,1,float16,float16,0,0.07096533477306366
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,24,1,128,1,float16,fp8,0,0.05256533126036326
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,24,24,128,1,float16,fp8,0,0.07436266541481018
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,24,2,128,1,float16,fp8,0,0.05426666637261709
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,24,2,128,1,float16,float16,0,0.056757330894470215
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,24,4,128,1,float16,float16,0,0.06192000210285187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,24,4,128,1,float16,fp8,0,0.06272000074386597
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,24,8,128,1,float16,fp8,0,0.06250133117039998
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,24,8,128,1,float16,float16,0,0.062352001667022705
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,24,1,128,1,float16,float16,0,1.0944480101267497
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,24,1,128,1,float16,fp8,0,0.998469352722168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,24,2,128,1,float16,float16,0,1.2127573490142822
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,24,2,128,1,float16,fp8,0,1.1181386311848958
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,24,4,128,1,float16,float16,0,1.6880693435668945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,24,4,128,1,float16,fp8,0,1.4781440099080403
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,24,24,128,1,float16,float16,0,1.1237813631693523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,24,8,128,1,float16,float16,0,1.5860479672749836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,24,8,128,1,float16,fp8,0,1.550063927968343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,24,24,128,1,float16,fp8,0,1.2206079959869385
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,24,1,128,1,float16,float16,0,0.5713813304901123
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,24,2,128,1,float16,float16,0,0.6304800113042196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,24,1,128,1,float16,fp8,0,0.5219626824061075
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,24,2,128,1,float16,fp8,0,0.5818453232447306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,24,4,128,1,float16,float16,0,0.8433067003885905
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,24,4,128,1,float16,fp8,0,0.8863360087076823
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,24,8,128,1,float16,float16,0,0.8367626667022705
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,24,8,128,1,float16,fp8,0,0.8554399808247884
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,24,24,128,1,float16,float16,0,0.5804586807886759
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,24,1,128,1,float16,fp8,0,0.27879466613133747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,24,1,128,1,float16,float16,0,0.3083626627922058
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,24,24,128,1,float16,fp8,0,0.6273920138676962
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,24,2,128,1,float16,float16,0,0.33036800225575763
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,24,2,128,1,float16,fp8,0,0.3051733374595642
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,24,4,128,1,float16,fp8,0,0.423802653948466
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,24,4,128,1,float16,float16,0,0.45348799228668213
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,24,8,128,1,float16,float16,0,0.4619786739349365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,24,8,128,1,float16,fp8,0,0.4224319855372111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,24,24,128,1,float16,float16,0,0.30429333448410034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,24,1,128,1,float16,float16,0,0.16586132844289145
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,24,1,128,1,float16,fp8,0,0.1525920033454895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,24,24,128,1,float16,fp8,0,0.33005332946777344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,24,2,128,1,float16,fp8,0,0.1723733345667521
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,24,2,128,1,float16,float16,0,0.18226132790247598
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,24,4,128,1,float16,float16,0,0.2414240042368571
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,24,4,128,1,float16,fp8,0,0.23724265893300375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,24,8,128,1,float16,float16,0,0.23556800683339438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,24,8,128,1,float16,fp8,0,0.23044800758361816
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,24,24,128,1,float16,fp8,0,0.1784320076306661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,24,24,128,1,float16,float16,0,0.16762133439381918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,24,1,128,1,float16,fp8,0,0.08658132950464885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,24,1,128,1,float16,float16,0,0.09151466687520345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,24,2,128,1,float16,float16,0,0.09665600458780925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,24,2,128,1,float16,fp8,0,0.09259733557701111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,24,4,128,1,float16,fp8,0,0.11648000280062358
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,24,4,128,1,float16,float16,0,0.12073066830635071
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,24,8,128,1,float16,float16,0,0.13212266564369202
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,24,8,128,1,float16,fp8,0,0.11870400110880534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,24,1,128,1,float16,float16,0,0.058634668588638306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,24,24,128,1,float16,float16,0,0.09445333480834961
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,24,1,128,1,float16,fp8,0,0.054005334774653115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,24,24,128,1,float16,fp8,0,0.08905067046483357
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,24,2,128,1,float16,float16,0,0.06038400034109751
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,24,2,128,1,float16,fp8,0,0.05820266902446747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,24,4,128,1,float16,float16,0,0.06856533388296764
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,24,4,128,1,float16,fp8,0,0.06955199937025706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,24,8,128,1,float16,float16,0,0.06910933554172516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,24,8,128,1,float16,fp8,0,0.07027733325958252
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,24,1,128,1,float16,float16,0,0.0356480007370313
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,24,1,128,1,float16,fp8,0,0.03358400116364161
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,24,24,128,1,float16,fp8,0,0.052229334910710655
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,24,24,128,1,float16,float16,0,0.044938668608665466
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,24,2,128,1,float16,float16,0,0.036464000741640724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,24,2,128,1,float16,fp8,0,0.03554133325815201
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,24,4,128,1,float16,float16,0,0.039701332648595176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,24,4,128,1,float16,fp8,0,0.04160533348719279
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,24,8,128,1,float16,float16,0,0.04125333329041799
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,24,8,128,1,float16,fp8,0,0.041936000188191734
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,24,1,128,1,float16,float16,0,1.1766400337219238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,24,1,128,1,float16,fp8,0,1.0823573271433513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,24,2,128,1,float16,fp8,0,1.2458133697509766
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,24,2,128,1,float16,float16,0,1.3120426336924236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,24,4,128,1,float16,float16,0,1.9815999666849773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,24,4,128,1,float16,fp8,0,1.9947840372721355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,24,8,128,1,float16,float16,0,1.818826675415039
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,24,8,128,1,float16,fp8,0,1.9889973004659016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,24,1,128,1,float16,float16,0,0.6140480041503906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,24,1,128,1,float16,fp8,0,0.5655466715494791
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,24,24,128,1,float16,float16,0,1.336202621459961
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,24,24,128,1,float16,fp8,0,1.4987680117289226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,24,2,128,1,float16,float16,0,0.6779946486155192
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,24,2,128,1,float16,fp8,0,0.6415520111719767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,24,8,128,1,float16,float16,0,1.0233066876729329
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,24,4,128,1,float16,float16,0,0.9615093072255453
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,24,4,128,1,float16,fp8,0,1.0522879759470622
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,24,8,128,1,float16,fp8,0,1.0340906778971355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,24,24,128,1,float16,float16,0,0.6861013571421305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,24,24,128,1,float16,fp8,0,0.7658080259958903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,24,1,128,1,float16,float16,0,0.3283573389053345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,24,1,128,1,float16,fp8,0,0.2942986687024434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,24,2,128,1,float16,float16,0,0.36869335174560547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,24,2,128,1,float16,fp8,0,0.3426186641057332
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,24,4,128,1,float16,float16,0,0.5052640040715536
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,24,4,128,1,float16,fp8,0,0.5415893395741781
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,24,8,128,1,float16,float16,0,0.51201065381368
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,24,8,128,1,float16,fp8,0,0.48528532187143963
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,24,1,128,1,float16,float16,0,0.17369065682093301
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,24,24,128,1,float16,fp8,0,0.39774401982625324
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,24,24,128,1,float16,float16,0,0.35818131764729816
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,24,1,128,1,float16,fp8,0,0.16182399789492288
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,24,2,128,1,float16,float16,0,0.1922826568285624
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,24,2,128,1,float16,fp8,0,0.18128534158070883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,24,4,128,1,float16,float16,0,0.2788426677385966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,24,4,128,1,float16,fp8,0,0.27568533023198444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,24,8,128,1,float16,float16,0,0.2569546699523926
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,24,8,128,1,float16,fp8,0,0.27664534250895184
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,24,1,128,1,float16,float16,0,0.0953546663125356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,24,24,128,1,float16,float16,0,0.19104532400767008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,24,1,128,1,float16,fp8,0,0.08701866865158081
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,24,24,128,1,float16,fp8,0,0.2108693321545919
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,24,2,128,1,float16,float16,0,0.10693333546320598
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,24,2,128,1,float16,fp8,0,0.09589866797129314
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,24,4,128,1,float16,float16,0,0.14622400204340616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,24,4,128,1,float16,fp8,0,0.13750400145848593
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,24,8,128,1,float16,float16,0,0.14095466335614523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,24,8,128,1,float16,fp8,0,0.14268267154693604
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,24,1,128,1,float16,float16,0,0.05705066521962484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,24,1,128,1,float16,fp8,0,0.05557866891225179
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,24,24,128,1,float16,float16,0,0.10652800401051839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,24,24,128,1,float16,fp8,0,0.11343466242154439
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,24,2,128,1,float16,float16,0,0.05919999877611796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,24,2,128,1,float16,fp8,0,0.05927466849486033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,24,4,128,1,float16,float16,0,0.07126933336257935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,24,4,128,1,float16,fp8,0,0.07468266785144806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,24,8,128,1,float16,float16,0,0.07147199908892314
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,24,8,128,1,float16,fp8,0,0.0753653347492218
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,24,1,128,1,float16,float16,0,0.039146666725476585
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,24,24,128,1,float16,fp8,0,0.06001066664854685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,24,1,128,1,float16,fp8,0,0.03812266637881597
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,24,24,128,1,float16,float16,0,0.05219733218352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,24,2,128,1,float16,float16,0,0.039936001102129616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,24,2,128,1,float16,fp8,0,0.03977599988381068
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,24,4,128,1,float16,float16,0,0.046053335070610046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,24,4,128,1,float16,fp8,0,0.048122664292653404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,24,8,128,1,float16,float16,0,0.046181331078211464
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,24,8,128,1,float16,fp8,0,0.04840533435344696
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,24,1,128,1,float16,float16,0,0.02739199995994568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,24,1,128,1,float16,fp8,0,0.026144000391165417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,24,24,128,1,float16,float16,0,0.03430933256944021
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,24,2,128,1,float16,float16,0,0.027823999524116516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,24,24,128,1,float16,fp8,0,0.03815466662247976
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,24,2,128,1,float16,fp8,0,0.027477333943049114
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,24,4,128,1,float16,fp8,0,0.03156800071398417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,24,4,128,1,float16,float16,0,0.031343999008337654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,24,8,128,1,float16,fp8,0,0.03173866619666418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,24,8,128,1,float16,float16,0,0.029968000948429108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,24,1,128,1,float16,float16,0,0.9161972999572754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,24,1,128,1,float16,fp8,0,0.8581439654032389
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,24,2,128,1,float16,fp8,0,1.0137813091278076
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,24,2,128,1,float16,float16,0,1.0576852957407634
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,24,4,128,1,float16,float16,0,1.7224799791971843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,24,4,128,1,float16,fp8,0,1.768986701965332
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,24,8,128,1,float16,float16,0,1.585386594136556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,24,8,128,1,float16,fp8,0,1.5831039746602376
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,24,24,128,1,float16,float16,0,1.1994826793670654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,24,24,128,1,float16,fp8,0,1.3813066482543945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,24,1,128,1,float16,float16,0,0.4835040171941121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,24,1,128,1,float16,fp8,0,0.44278931617736816
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,24,2,128,1,float16,float16,0,0.5445226828257242
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,24,2,128,1,float16,fp8,0,0.52456001440684
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,24,4,128,1,float16,float16,0,0.888048013051351
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,24,4,128,1,float16,fp8,0,0.8536480267842611
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,24,8,128,1,float16,float16,0,0.8852852980295817
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,24,8,128,1,float16,fp8,0,0.8746346632639567
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,24,24,128,1,float16,float16,0,0.6173760096232096
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,24,24,128,1,float16,fp8,0,0.7031520207722982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,24,1,128,1,float16,float16,0,0.2556053400039673
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,24,1,128,1,float16,fp8,0,0.23190933465957642
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,24,2,128,1,float16,float16,0,0.29707733790079754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,24,2,128,1,float16,fp8,0,0.2737813393274943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,24,4,128,1,float16,float16,0,0.422874649365743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,24,4,128,1,float16,fp8,0,0.42588265736897785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,24,8,128,1,float16,float16,0,0.4223466714223226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,24,8,128,1,float16,fp8,0,0.47679467995961505
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,24,24,128,1,float16,float16,0,0.3205920060475667
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,24,24,128,1,float16,fp8,0,0.3636853297551473
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,24,1,128,1,float16,float16,0,0.13614933689435324
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,24,1,128,1,float16,fp8,0,0.1309653321901957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,24,2,128,1,float16,float16,0,0.15972800056139627
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,24,2,128,1,float16,fp8,0,0.15267200271288553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,24,4,128,1,float16,float16,0,0.23820799589157104
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,24,4,128,1,float16,fp8,0,0.2512106696764628
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,24,8,128,1,float16,float16,0,0.22549333175023398
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,24,8,128,1,float16,fp8,0,0.22445333003997803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,24,24,128,1,float16,float16,0,0.16935465733210245
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,24,1,128,1,float16,float16,0,0.07460266848405202
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,24,24,128,1,float16,fp8,0,0.1896479924519857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,24,1,128,1,float16,fp8,0,0.06849066913127899
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,24,2,128,1,float16,float16,0,0.08427199721336365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,24,2,128,1,float16,fp8,0,0.07634133100509644
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,24,4,128,1,float16,float16,0,0.12492266297340393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,24,4,128,1,float16,fp8,0,0.11246400078137715
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,24,8,128,1,float16,float16,0,0.12087999780972798
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,24,8,128,1,float16,fp8,0,0.13359999656677246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,24,24,128,1,float16,float16,0,0.09371733665466309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,24,1,128,1,float16,float16,0,0.04452799757321676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,24,2,128,1,float16,float16,0,0.04751466711362203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,24,24,128,1,float16,fp8,0,0.10145066181818645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,24,1,128,1,float16,fp8,0,0.042949333786964417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,24,2,128,1,float16,fp8,0,0.047728002071380615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,24,4,128,1,float16,float16,0,0.05862399935722351
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,24,4,128,1,float16,fp8,0,0.06266133487224579
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,24,8,128,1,float16,fp8,0,0.06419733166694641
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,24,8,128,1,float16,float16,0,0.06011733412742615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,24,1,128,1,float16,float16,0,0.030991998811562855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,24,24,128,1,float16,float16,0,0.04372266431649526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,24,24,128,1,float16,fp8,0,0.05195199946562449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,24,1,128,1,float16,fp8,0,0.029637334247430164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,24,2,128,1,float16,float16,0,0.032730666299661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,24,2,128,1,float16,fp8,0,0.03164266546567281
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,24,4,128,1,float16,float16,0,0.03774933268626531
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,24,4,128,1,float16,fp8,0,0.03962666789690653
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,24,8,128,1,float16,fp8,0,0.0415786678592364
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,24,8,128,1,float16,float16,0,0.03758399933576584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,24,1,128,1,float16,float16,0,0.021301334102948506
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,24,1,128,1,float16,fp8,0,0.02123733361562093
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,24,24,128,1,float16,fp8,0,0.03330666571855545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,24,24,128,1,float16,float16,0,0.027621333797772724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,24,2,128,1,float16,float16,0,0.023130667706330616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,24,2,128,1,float16,fp8,0,0.0230880007147789
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,24,4,128,1,float16,float16,0,0.024688000480333965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,24,4,128,1,float16,fp8,0,0.02535466601451238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,24,8,128,1,float16,fp8,0,0.02752533306678136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,24,8,128,1,float16,float16,0,0.025237334271272022
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,24,1,128,1,float16,float16,0,0.021146667500336964
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,24,1,128,1,float16,fp8,0,0.02109866589307785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,24,24,128,1,float16,fp8,0,0.025263999899228413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,24,2,128,1,float16,float16,0,0.021136000752449036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,24,24,128,1,float16,float16,0,0.023605334262053173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,24,2,128,1,float16,fp8,0,0.021168000996112823
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,24,4,128,1,float16,float16,0,0.021333334346612293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,24,4,128,1,float16,fp8,0,0.021488000949223835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,24,8,128,1,float16,float16,0,0.021829334398110706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,24,8,128,1,float16,fp8,0,0.023130667706330616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,24,1,128,1,float16,float16,0,0.39237332344055176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,24,1,128,1,float16,fp8,0,0.3762133518854777
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,24,2,128,1,float16,fp8,0,0.459663987159729
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,24,2,128,1,float16,float16,0,0.46215466658274335
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,24,4,128,1,float16,float16,0,0.7538560231526693
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,24,4,128,1,float16,fp8,0,0.7773439884185791
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,24,8,128,1,float16,fp8,0,0.8434453010559082
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,24,8,128,1,float16,float16,0,0.8029759724934896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,24,24,128,1,float16,float16,0,0.5701386531194051
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,24,1,128,1,float16,float16,0,0.2080693244934082
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,24,1,128,1,float16,fp8,0,0.19785600900650024
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,24,24,128,1,float16,fp8,0,0.6662346522013346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,24,2,128,1,float16,float16,0,0.24116800228754678
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,24,2,128,1,float16,fp8,0,0.2413546641667684
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,24,4,128,1,float16,float16,0,0.39210132757822674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,24,4,128,1,float16,fp8,0,0.44356266657511395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,24,8,128,1,float16,float16,0,0.38099201520284015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,24,8,128,1,float16,fp8,0,0.39487465222676593
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,24,24,128,1,float16,float16,0,0.29822399218877155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,24,24,128,1,float16,fp8,0,0.3455413182576497
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,24,1,128,1,float16,float16,0,0.11412800351778667
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,24,2,128,1,float16,float16,0,0.13752532998720804
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,24,1,128,1,float16,fp8,0,0.10833066701889038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,24,2,128,1,float16,fp8,0,0.13030933340390524
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,24,4,128,1,float16,float16,0,0.21581333875656128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,24,4,128,1,float16,fp8,0,0.23152534166971842
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,24,8,128,1,float16,fp8,0,0.2061013380686442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,24,8,128,1,float16,float16,0,0.21809599796930948
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,24,24,128,1,float16,float16,0,0.15904000401496887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,24,1,128,1,float16,float16,0,0.06431999802589417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,24,2,128,1,float16,float16,0,0.0761599987745285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,24,1,128,1,float16,fp8,0,0.05958400170008341
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,24,24,128,1,float16,fp8,0,0.18203200896581015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,24,2,128,1,float16,fp8,0,0.06851733227570851
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,24,4,128,1,float16,float16,0,0.10915199915568034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,24,4,128,1,float16,fp8,0,0.10371733705202739
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,24,8,128,1,float16,float16,0,0.11754133303960164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,24,8,128,1,float16,fp8,0,0.11441600322723389
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,24,1,128,1,float16,float16,0,0.03770133356253306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,24,24,128,1,float16,float16,0,0.08575466275215149
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,24,24,128,1,float16,fp8,0,0.09573866923650105
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,24,1,128,1,float16,fp8,0,0.036346666514873505
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,24,2,128,1,float16,float16,0,0.04108799993991852
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,24,4,128,1,float16,float16,0,0.05182399849096934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,24,4,128,1,float16,fp8,0,0.0561653325955073
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,24,2,128,1,float16,fp8,0,0.04166933397452036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,24,8,128,1,float16,float16,0,0.0524533341328303
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,24,8,128,1,float16,fp8,0,0.05801600217819214
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,24,24,128,1,float16,fp8,0,0.0480320006608963
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,24,1,128,1,float16,float16,0,0.0272533322374026
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,24,24,128,1,float16,float16,0,0.03989866624275843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,24,1,128,1,float16,fp8,0,0.027285332481066387
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,24,2,128,1,float16,float16,0,0.029290666182835896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,24,2,128,1,float16,fp8,0,0.029509333272775013
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,24,4,128,1,float16,float16,0,0.033514666060606636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,24,4,128,1,float16,fp8,0,0.03746666759252548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,24,8,128,1,float16,float16,0,0.03469866762558619
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,24,8,128,1,float16,fp8,0,0.03746666759252548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,24,1,128,1,float16,float16,0,0.017370666066805523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,24,24,128,1,float16,fp8,0,0.03126399964094162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,24,24,128,1,float16,float16,0,0.02550400048494339
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,24,1,128,1,float16,fp8,0,0.017184000462293625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,24,2,128,1,float16,float16,0,0.019280000279347103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,24,2,128,1,float16,fp8,0,0.02086399992307027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,24,4,128,1,float16,float16,0,0.021253332495689392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,24,4,128,1,float16,fp8,0,0.0235359991590182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,24,8,128,1,float16,fp8,0,0.02364266663789749
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,24,8,128,1,float16,float16,0,0.02258133391539256
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,24,1,128,1,float16,float16,0,0.017237332959969837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,24,24,128,1,float16,float16,0,0.021338666478792827
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,24,24,128,1,float16,fp8,0,0.023130667706330616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,24,1,128,1,float16,fp8,0,0.01722666621208191
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,24,2,128,1,float16,float16,0,0.01758933315674464
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,24,2,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,24,4,128,1,float16,float16,0,0.018976000448067982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,24,4,128,1,float16,fp8,0,0.019285333653291065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,24,8,128,1,float16,float16,0,0.019178666174411774
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,24,8,128,1,float16,fp8,0,0.019482667247454327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,24,1,128,1,float16,float16,0,0.017456000049908955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,24,24,128,1,float16,fp8,0,0.01916266605257988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,24,24,128,1,float16,float16,0,0.018922666708628338
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,24,1,128,1,float16,fp8,0,0.017397332936525345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,24,2,128,1,float16,float16,0,0.01735466718673706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,24,2,128,1,float16,fp8,0,0.01735466718673706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,24,4,128,1,float16,float16,0,0.017456000049908955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,24,4,128,1,float16,fp8,0,0.017242666333913803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,24,8,128,1,float16,float16,0,0.017375999440749485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,24,8,128,1,float16,fp8,0,0.01756799966096878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,24,1,128,1,float16,float16,0,0.2369813323020935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,24,1,128,1,float16,fp8,0,0.22394132614135742
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,24,2,128,1,float16,float16,0,0.28014934062957764
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,24,2,128,1,float16,fp8,0,0.268069326877594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,24,4,128,1,float16,float16,0,0.4123573303222656
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,24,4,128,1,float16,fp8,0,0.4702133337656657
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,24,8,128,1,float16,float16,0,0.4025973478953044
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,24,8,128,1,float16,fp8,0,0.4309973319371541
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,24,24,128,1,float16,float16,0,0.3081386685371399
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,24,24,128,1,float16,fp8,0,0.3564000129699707
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,24,1,128,1,float16,float16,0,0.12711466352144876
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,24,1,128,1,float16,fp8,0,0.12057066957155864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,24,4,128,1,float16,fp8,0,0.24409600098927817
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,24,4,128,1,float16,float16,0,0.22924800713857016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,24,2,128,1,float16,float16,0,0.14430933197339377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,24,2,128,1,float16,fp8,0,0.14215466380119324
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,24,8,128,1,float16,float16,0,0.21732799212137857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,24,8,128,1,float16,fp8,0,0.2254400054613749
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,24,24,128,1,float16,float16,0,0.16351999839146933
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,24,24,128,1,float16,fp8,0,0.18545599778493246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,24,1,128,1,float16,fp8,0,0.06433066725730896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,24,1,128,1,float16,float16,0,0.07044800122578938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,24,2,128,1,float16,float16,0,0.07871999839941661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,24,2,128,1,float16,fp8,0,0.07311466832955678
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,24,4,128,1,float16,float16,0,0.12027733524640401
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,24,4,128,1,float16,fp8,0,0.11961066722869873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,24,8,128,1,float16,float16,0,0.12145599722862244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,24,8,128,1,float16,fp8,0,0.12984533111254373
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,24,1,128,1,float16,float16,0,0.03966933240493139
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,24,24,128,1,float16,fp8,0,0.09623466928799947
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,24,24,128,1,float16,float16,0,0.09411733349164327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,24,1,128,1,float16,fp8,0,0.039701332648595176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,24,2,128,1,float16,float16,0,0.04368533194065094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,24,4,128,1,float16,fp8,0,0.05991466840108236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,24,4,128,1,float16,float16,0,0.05418666700522105
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,24,2,128,1,float16,fp8,0,0.04358399907747904
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,24,8,128,1,float16,float16,0,0.055530667304992676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,24,8,128,1,float16,fp8,0,0.06016000111897787
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,24,1,128,1,float16,float16,0,0.02515733242034912
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,24,24,128,1,float16,float16,0,0.03979199876387914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,24,24,128,1,float16,fp8,0,0.045978665351867676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,24,1,128,1,float16,fp8,0,0.025445332129796345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,24,2,128,1,float16,float16,0,0.027274665733178455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,24,4,128,1,float16,float16,0,0.03188266605138779
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,24,4,128,1,float16,fp8,0,0.035418666899204254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,24,2,128,1,float16,fp8,0,0.027429332335789997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,24,8,128,1,float16,float16,0,0.03345066557327906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,24,8,128,1,float16,fp8,0,0.0353973334034284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,24,1,128,1,float16,float16,0,0.019120000302791595
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,24,2,128,1,float16,float16,0,0.01942933350801468
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,24,24,128,1,float16,fp8,0,0.03176533430814743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,24,24,128,1,float16,float16,0,0.025472000241279602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,24,1,128,1,float16,fp8,0,0.01915733392039935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,24,2,128,1,float16,fp8,0,0.021183999876181286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,24,4,128,1,float16,float16,0,0.023157333334287006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,24,4,128,1,float16,fp8,0,0.025226667523384094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,24,8,128,1,float16,float16,0,0.02312533309062322
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,24,8,128,1,float16,fp8,0,0.025285333395004272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,24,1,128,1,float16,float16,0,0.015082667271296183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,24,24,128,1,float16,fp8,0,0.021333334346612293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,24,1,128,1,float16,fp8,0,0.01482133318980535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,24,2,128,1,float16,float16,0,0.01482133318980535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,24,24,128,1,float16,float16,0,0.01721599946419398
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,24,2,128,1,float16,fp8,0,0.014933332800865173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,24,4,128,1,float16,float16,0,0.015008000036080679
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,24,4,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,24,8,128,1,float16,float16,0,0.01534933348496755
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,24,8,128,1,float16,fp8,0,0.01720533271630605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,24,1,128,1,float16,float16,0,0.013199999928474426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,24,24,128,1,float16,fp8,0,0.015119999647140503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,24,24,128,1,float16,float16,0,0.01522133375207583
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,24,1,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,24,2,128,1,float16,float16,0,0.013034666577974955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,24,2,128,1,float16,fp8,0,0.01321600005030632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,24,4,128,1,float16,float16,0,0.013301332791646322
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,24,4,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,24,8,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,24,8,128,1,float16,float16,0,0.013823999712864557
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,24,24,128,1,float16,float16,0,0.014677333335081736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,24,1,128,1,float16,float16,0,0.013242666920026144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,24,2,128,1,float16,float16,0,0.01310933381319046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,24,1,128,1,float16,fp8,0,0.013455999394257864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,24,24,128,1,float16,fp8,0,0.01498666654030482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,24,2,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,24,4,128,1,float16,float16,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,24,4,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,24,8,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,24,8,128,1,float16,float16,0,0.013248000293970108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,24,1,128,1,float16,float16,0,0.18795732657114664
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,24,1,128,1,float16,fp8,0,0.16688533624013266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,24,2,128,1,float16,float16,0,0.2055573264757792
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,24,4,128,1,float16,float16,0,0.29074666897455853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,24,4,128,1,float16,fp8,0,0.29043734073638916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,24,8,128,1,float16,float16,0,0.27053866783777875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,24,2,128,1,float16,fp8,0,0.19023466110229492
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,24,24,128,1,float16,float16,0,0.1931040088335673
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,24,8,128,1,float16,fp8,0,0.271071990331014
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,24,1,128,1,float16,float16,0,0.10251200199127197
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,24,24,128,1,float16,fp8,0,0.21065600713094076
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,24,1,128,1,float16,fp8,0,0.08873066306114197
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,24,2,128,1,float16,float16,0,0.1095360020796458
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,24,4,128,1,float16,float16,0,0.14722133676211038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,24,2,128,1,float16,fp8,0,0.09804800152778625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,24,4,128,1,float16,fp8,0,0.13900799552599588
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,24,24,128,1,float16,float16,0,0.10458667079607646
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,24,8,128,1,float16,float16,0,0.15177599589029947
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,24,8,128,1,float16,fp8,0,0.15502400199572244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,24,1,128,1,float16,float16,0,0.053786665201187134
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,24,24,128,1,float16,fp8,0,0.10995733737945557
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,24,1,128,1,float16,fp8,0,0.04986133178075155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,24,2,128,1,float16,float16,0,0.05665066838264465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,24,2,128,1,float16,fp8,0,0.05586666862169901
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,24,4,128,1,float16,fp8,0,0.07046933472156525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,24,4,128,1,float16,float16,0,0.0680320014556249
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,24,8,128,1,float16,float16,0,0.07467733323574066
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,24,8,128,1,float16,fp8,0,0.07223999996980031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,24,1,128,1,float16,float16,0,0.0336053321758906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,24,24,128,1,float16,float16,0,0.047151997685432434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,24,24,128,1,float16,fp8,0,0.05336533486843109
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,24,1,128,1,float16,fp8,0,0.031498665610949196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,24,2,128,1,float16,float16,0,0.03437866767247518
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,24,2,128,1,float16,fp8,0,0.03364799916744232
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,24,4,128,1,float16,float16,0,0.03956266740957896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,24,4,128,1,float16,fp8,0,0.04170133173465729
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,24,8,128,1,float16,float16,0,0.03993066648642222
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,24,8,128,1,float16,fp8,0,0.04166933397452036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,24,24,128,1,float16,fp8,0,0.03336533407370249
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,24,24,128,1,float16,float16,0,0.028442665934562683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,24,1,128,1,float16,float16,0,0.021226666867733
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,24,1,128,1,float16,fp8,0,0.02088533341884613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,24,2,128,1,float16,float16,0,0.02310933421055476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,24,4,128,1,float16,float16,0,0.025349333882331848
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,24,2,128,1,float16,fp8,0,0.022597332795461018
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,24,4,128,1,float16,fp8,0,0.025466665625572205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,24,8,128,1,float16,float16,0,0.025487999121348064
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,24,8,128,1,float16,fp8,0,0.02735466758410136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,24,1,128,1,float16,float16,0,0.017093333105246227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,24,24,128,1,float16,float16,0,0.019797333826621372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,24,1,128,1,float16,fp8,0,0.01718933383623759
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,24,24,128,1,float16,fp8,0,0.023189333577950794
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,24,2,128,1,float16,float16,0,0.017055999487638474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,24,2,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,24,4,128,1,float16,float16,0,0.01931200052301089
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,24,4,128,1,float16,fp8,0,0.019146667172511418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,24,8,128,1,float16,float16,0,0.01904533306757609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,24,1,128,1,float16,float16,0,0.01498666654030482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,24,8,128,1,float16,fp8,0,0.01911466692884763
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,24,24,128,1,float16,fp8,0,0.015178666760524115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,24,2,128,1,float16,float16,0,0.01302933320403099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,24,1,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,24,24,128,1,float16,float16,0,0.015018666783968607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,24,4,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,24,4,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,24,2,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,24,8,128,1,float16,float16,0,0.013237333546082178
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,24,8,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,24,1,128,1,float16,float16,0,0.012879999975363413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,24,24,128,1,float16,float16,0,0.013274667163689932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,24,1,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,24,2,128,1,float16,float16,0,0.012842666357755661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,24,24,128,1,float16,fp8,0,0.01492799942692121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,24,4,128,1,float16,float16,0,0.01312000056107839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,24,2,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,24,4,128,1,float16,fp8,0,0.012789333860079447
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,24,8,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,24,8,128,1,float16,float16,0,0.013104000439246496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,24,1,128,1,float16,float16,0,0.012944000462690989
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,24,24,128,1,float16,float16,0,0.012986666212479273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,24,24,128,1,float16,fp8,0,0.014885333677132925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,24,1,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,24,2,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,24,2,128,1,float16,float16,0,0.013007999708255133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,24,4,128,1,float16,float16,0,0.013088000317414602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,24,4,128,1,float16,fp8,0,0.013178666432698568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,24,8,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,24,8,128,1,float16,float16,0,0.013066666821638743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,24,1,128,1,float16,float16,0,0.17112000783284506
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,24,1,128,1,float16,fp8,0,0.14017599821090698
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,24,2,128,1,float16,float16,0,0.1811359922091166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,24,2,128,1,float16,fp8,0,0.1507306694984436
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,24,4,128,1,float16,float16,0,0.21861867109934488
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,24,4,128,1,float16,fp8,0,0.19340799252192178
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,24,24,128,1,float16,float16,0,0.13799466689427695
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,24,24,128,1,float16,fp8,0,0.13614400227864584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,24,8,128,1,float16,float16,0,0.22059732675552368
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,24,8,128,1,float16,fp8,0,0.20771199464797974
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,24,1,128,1,float16,float16,0,0.08761066198348999
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,24,1,128,1,float16,fp8,0,0.0745600014925003
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,24,2,128,1,float16,fp8,0,0.08067200084527333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,24,2,128,1,float16,float16,0,0.09170666337013245
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,24,4,128,1,float16,fp8,0,0.09501866499582927
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,24,4,128,1,float16,float16,0,0.10269332925478618
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,24,8,128,1,float16,float16,0,0.1125973363717397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,24,8,128,1,float16,fp8,0,0.09714667002360027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,24,24,128,1,float16,float16,0,0.07111466427644093
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,24,24,128,1,float16,fp8,0,0.0646613339583079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,24,1,128,1,float16,float16,0,0.04966933528582255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,24,1,128,1,float16,fp8,0,0.04354133208592733
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,24,2,128,1,float16,fp8,0,0.04587733248869578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,24,2,128,1,float16,float16,0,0.051183998584747314
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,24,4,128,1,float16,float16,0,0.05653866628805796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,24,4,128,1,float16,fp8,0,0.05266666909058889
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,24,8,128,1,float16,fp8,0,0.054048001766204834
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,24,8,128,1,float16,float16,0,0.05625066657861074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,24,24,128,1,float16,float16,0,0.03679466744263967
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,24,24,128,1,float16,fp8,0,0.03984533250331879
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,24,1,128,1,float16,float16,0,0.029343999922275543
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,24,1,128,1,float16,fp8,0,0.02733866622050603
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,24,2,128,1,float16,float16,0,0.03133333226044973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,24,2,128,1,float16,fp8,0,0.029605334003766377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,24,4,128,1,float16,float16,0,0.032629333436489105
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,24,4,128,1,float16,fp8,0,0.032885332902272545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,24,8,128,1,float16,float16,0,0.03380800038576126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,24,8,128,1,float16,fp8,0,0.03340800106525421
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,24,1,128,1,float16,float16,0,0.02037866661945979
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,24,1,128,1,float16,fp8,0,0.019317333896954853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,24,24,128,1,float16,float16,0,0.0234400009115537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,24,24,128,1,float16,fp8,0,0.025418666501839954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,24,2,128,1,float16,float16,0,0.021130666136741638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,24,2,128,1,float16,fp8,0,0.019296000401178997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,24,4,128,1,float16,float16,0,0.021146667500336964
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,24,4,128,1,float16,fp8,0,0.021397332350413006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,24,8,128,1,float16,float16,0,0.021354667842388153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,24,8,128,1,float16,fp8,0,0.02142400046189626
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,24,1,128,1,float16,float16,0,0.01700266698996226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,24,24,128,1,float16,fp8,0,0.017674667139848072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,24,24,128,1,float16,float16,0,0.01695999999841054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,24,1,128,1,float16,fp8,0,0.016458666572968166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,24,2,128,1,float16,float16,0,0.017029333859682083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,24,2,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,24,4,128,1,float16,float16,0,0.01524266724785169
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,24,4,128,1,float16,fp8,0,0.01701333373785019
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,24,8,128,1,float16,float16,0,0.017077332983414333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,24,8,128,1,float16,fp8,0,0.015301333119471868
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,24,1,128,1,float16,float16,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,24,24,128,1,float16,float16,0,0.013269333789745966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,24,2,128,1,float16,float16,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,24,1,128,1,float16,fp8,0,0.012837332983811697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,24,24,128,1,float16,fp8,0,0.014965333044528961
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,24,4,128,1,float16,float16,0,0.013199999928474426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,24,4,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,24,2,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,24,8,128,1,float16,float16,0,0.013242666920026144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,24,8,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,24,1,128,1,float16,float16,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,24,1,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,24,24,128,1,float16,float16,0,0.013189333180586496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,24,24,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,24,2,128,1,float16,float16,0,0.013050666699806849
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,24,2,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,24,4,128,1,float16,float16,0,0.012256000190973282
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,24,4,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,24,8,128,1,float16,float16,0,0.013002666334311167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,24,8,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,24,1,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,24,1,128,1,float16,float16,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,24,24,128,1,float16,float16,0,0.01302933320403099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,24,24,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,24,2,128,1,float16,float16,0,0.012725333372751871
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,24,2,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,24,4,128,1,float16,float16,0,0.011077333241701126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,24,4,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,24,8,128,1,float16,float16,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,24,8,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,24,1,128,1,float16,fp8,0,0.12814933061599731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,24,1,128,1,float16,float16,0,0.15870400269826254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,24,2,128,1,float16,float16,0,0.16078399618466696
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,24,2,128,1,float16,fp8,0,0.13235200444857279
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,24,4,128,1,float16,float16,0,0.18718934059143066
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,24,4,128,1,float16,fp8,0,0.14645333091417947
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,24,24,128,1,float16,float16,0,0.11133866508801778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,24,24,128,1,float16,fp8,0,0.09558399518330891
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,24,8,128,1,float16,fp8,0,0.1527786652247111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,24,8,128,1,float16,float16,0,0.18799465894699097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,24,1,128,1,float16,float16,0,0.08515200018882751
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,24,1,128,1,float16,fp8,0,0.06866133213043213
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,24,2,128,1,float16,float16,0,0.08689066767692566
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,24,4,128,1,float16,float16,0,0.09180266658465068
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,24,4,128,1,float16,fp8,0,0.07894933223724365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,24,2,128,1,float16,fp8,0,0.07226133346557617
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,24,8,128,1,float16,float16,0,0.09308800101280212
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,24,24,128,1,float16,float16,0,0.054986665646235146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,24,1,128,1,float16,float16,0,0.04824000100294749
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,24,8,128,1,float16,fp8,0,0.07904533545176189
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,24,24,128,1,float16,fp8,0,0.052229334910710655
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,24,2,128,1,float16,float16,0,0.05016533533732096
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,24,2,128,1,float16,fp8,0,0.041802664597829185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,24,1,128,1,float16,fp8,0,0.039877332746982574
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,24,4,128,1,float16,float16,0,0.052101333936055504
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,24,4,128,1,float16,fp8,0,0.04595733185609182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,24,8,128,1,float16,float16,0,0.04997866849104563
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,24,8,128,1,float16,fp8,0,0.04563199977080027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,24,1,128,1,float16,float16,0,0.027690666417280834
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,24,1,128,1,float16,fp8,0,0.02555199960867564
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,24,24,128,1,float16,fp8,0,0.031632001201311745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,24,24,128,1,float16,float16,0,0.03160000095764796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,24,2,128,1,float16,float16,0,0.027349332968393963
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,24,2,128,1,float16,fp8,0,0.025290665527184803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,24,4,128,1,float16,float16,0,0.03054933249950409
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,24,4,128,1,float16,fp8,0,0.02756800005833308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,24,8,128,1,float16,fp8,0,0.028362666567166645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,24,8,128,1,float16,float16,0,0.02940800040960312
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,24,24,128,1,float16,float16,0,0.02124800036350886
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,24,1,128,1,float16,float16,0,0.019237333287795384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,24,24,128,1,float16,fp8,0,0.021253332495689392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,24,1,128,1,float16,fp8,0,0.018976000448067982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,24,2,128,1,float16,float16,0,0.01941866676012675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,24,2,128,1,float16,fp8,0,0.019120000302791595
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,24,4,128,1,float16,float16,0,0.01937599976857503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,24,4,128,1,float16,fp8,0,0.018954666952292126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,24,8,128,1,float16,float16,0,0.02004266654451688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,24,8,128,1,float16,fp8,0,0.019258666783571243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,24,1,128,1,float16,float16,0,0.015386667102575302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,24,1,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,24,24,128,1,float16,fp8,0,0.01720533271630605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,24,2,128,1,float16,float16,0,0.015216000378131866
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,24,24,128,1,float16,float16,0,0.017269333203633625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,24,2,128,1,float16,fp8,0,0.014949332922697067
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,24,4,128,1,float16,float16,0,0.01509333277742068
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,24,4,128,1,float16,fp8,0,0.015311999867359797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,24,8,128,1,float16,float16,0,0.015450666348139444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,24,8,128,1,float16,fp8,0,0.015109332899252573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,24,1,128,1,float16,float16,0,0.013151999562978745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,24,1,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,24,24,128,1,float16,fp8,0,0.013861333330472311
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,24,24,128,1,float16,float16,0,0.013461332768201828
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,24,2,128,1,float16,float16,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,24,2,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,24,4,128,1,float16,float16,0,0.012853333105643591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,24,8,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,24,4,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,24,8,128,1,float16,float16,0,0.013317332913478216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,24,1,128,1,float16,float16,0,0.01228800043463707
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,24,24,128,1,float16,float16,0,0.013253333667914072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,24,1,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,24,2,128,1,float16,float16,0,0.011055999745925268
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,24,24,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,24,2,128,1,float16,fp8,0,0.013050666699806849
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,24,4,128,1,float16,float16,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,24,4,128,1,float16,fp8,0,0.011349332829316458
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,24,8,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,24,8,128,1,float16,float16,0,0.012730666746695837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,24,1,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,24,1,128,1,float16,fp8,0,0.011733333269755045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,24,24,128,1,float16,float16,0,0.013141332815090815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,24,24,128,1,float16,fp8,0,0.012928000340859095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,24,2,128,1,float16,float16,0,0.012661332885424295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,24,2,128,1,float16,fp8,0,0.012613333761692047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,24,4,128,1,float16,float16,0,0.013141332815090815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,24,4,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,24,8,128,1,float16,float16,0,0.012879999975363413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,24,8,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,0,0.01624533285697301
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,0,0.015130666395028433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,0,0.019039999693632126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,0,0.0173333336909612
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,0,0.025333332518736523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,0,0.02759466568628947
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,24,24,128,1,float16,float16,0,0.04231466849644979
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,24,24,128,1,float16,fp8,0,0.033615998923778534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,0,0.01302933320403099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,0,0.01498666654030482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,0,0.013797332843144735
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,0,0.01913600042462349
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,24,24,128,1,float16,float16,0,0.023247999449570973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,24,24,128,1,float16,fp8,0,0.023285334308942158
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,0,0.010960000256697336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,0,0.01116266722480456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,0,0.011338666081428528
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,0,0.014837333311637243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,0,0.013455999394257864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,24,24,128,1,float16,float16,0,0.017157333592573803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,24,24,128,1,float16,fp8,0,0.017152000218629837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,0,0.0106133334338665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,0,0.010970667004585266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,0,0.011178666104873022
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,0,0.011194666226704916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,0,0.011317333827416102
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,24,24,128,1,float16,float16,0,0.013130666067202887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,0,0.01108266661564509
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,0,0.009941333283980688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,24,24,128,1,float16,fp8,0,0.013786666095256805
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,0,0.010832000523805618
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,0,0.011098666737476984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,24,24,128,1,float16,float16,0,0.012138667205969492
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,24,24,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,0,0.011071999867757162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,0,0.011173332730929056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,0,0.010213333492477735
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,0,0.010703999549150467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,0,0.010863999525705973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,24,24,128,1,float16,fp8,0,0.010911999891201654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,24,24,128,1,float16,float16,0,0.011440000186363855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,0,0.009205333267649015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,0,0.01073066641887029
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,0,0.00984533317387104
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,0,0.010682666053374609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,0,0.008949333180983862
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,0,0.010837333897749582
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,24,24,128,1,float16,fp8,0,0.011637333780527115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,24,24,128,1,float16,float16,0,0.010842667271693548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,0,0.01089599976936976
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,0,0.010709332923094431
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,0,0.009685333197315535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,0,0.010026666646202406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,0,0.010464000205198923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,24,24,128,1,float16,float16,0,0.010842667271693548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,24,24,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,0,0.009253333633144697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,0,0.010629333555698395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,0,0.009338666374484697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,0,0.009925333162148794
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,0,0.009717333440979322
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,0,0.010821333775917688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,16,1,128,1,float16,fp8,0,5.210661252339681
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,16,2,128,1,float16,fp8,0,5.393802642822266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,16,1,128,1,float16,float16,0,7.517199834187825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,16,2,128,1,float16,float16,0,7.5747629801432295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,16,4,128,1,float16,fp8,0,6.136213302612305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,16,4,128,1,float16,float16,0,8.697280248006185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,16,8,128,1,float16,float16,0,8.45083745320638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,16,8,128,1,float16,fp8,0,6.1492156982421875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,16,16,128,1,float16,float16,0,3.8096640904744468
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,16,16,128,1,float16,fp8,0,3.2851521174112954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,16,1,128,1,float16,fp8,0,2.6880372365315757
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,16,1,128,1,float16,float16,0,3.186058680216471
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,16,2,128,1,float16,fp8,0,2.7532052993774414
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,16,2,128,1,float16,float16,0,3.8091840744018555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,16,4,128,1,float16,fp8,0,3.0919520060221353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,16,4,128,1,float16,float16,0,4.590421358744304
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,16,8,128,1,float16,float16,0,3.476463953653971
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,16,16,128,1,float16,float16,0,1.8671894073486328
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,16,8,128,1,float16,fp8,0,3.5752960840861
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,16,16,128,1,float16,fp8,0,1.701466719309489
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,16,1,128,1,float16,fp8,0,1.4193919499715169
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,16,1,128,1,float16,float16,0,2.005685329437256
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,16,2,128,1,float16,float16,0,1.6722559928894043
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,16,2,128,1,float16,fp8,0,1.4625013669331868
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,16,4,128,1,float16,float16,0,1.8221759796142578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,16,4,128,1,float16,fp8,0,1.5849493344624836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,16,8,128,1,float16,float16,0,1.8020480473836262
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,16,16,128,1,float16,float16,0,1.0068639914194744
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,16,8,128,1,float16,fp8,0,2.3680639266967773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,16,16,128,1,float16,fp8,0,0.9281760056813558
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,16,1,128,1,float16,float16,0,0.9057119687398275
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,16,1,128,1,float16,fp8,0,0.7702186902364095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,16,2,128,1,float16,fp8,0,0.7991253534952799
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,16,2,128,1,float16,float16,0,0.9199466705322266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,16,4,128,1,float16,float16,0,0.9835466543833414
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,16,4,128,1,float16,fp8,0,0.9003413518269857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,16,8,128,1,float16,fp8,0,0.8669546445210775
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,16,8,128,1,float16,float16,0,0.9795839786529541
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,16,1,128,1,float16,fp8,0,3.0621973673502603
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,16,1,128,1,float16,float16,0,3.6573012669881186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,16,2,128,1,float16,fp8,0,3.201711972554525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,16,2,128,1,float16,float16,0,4.059167861938477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,16,4,128,1,float16,float16,0,4.031370798746745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,16,4,128,1,float16,fp8,0,3.762997309366862
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,16,8,128,1,float16,float16,0,4.343434651692708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,16,8,128,1,float16,fp8,0,3.6027841567993164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,16,16,128,1,float16,float16,0,2.1918346087137857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,16,1,128,1,float16,fp8,0,1.5860692660013835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,16,16,128,1,float16,fp8,0,2.038405259450277
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,16,1,128,1,float16,float16,0,2.4945972760518393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,16,2,128,1,float16,float16,0,1.9134186108907063
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,16,2,128,1,float16,fp8,0,1.648373285929362
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,16,4,128,1,float16,float16,0,2.1831092834472656
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,16,4,128,1,float16,fp8,0,1.869157314300537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,16,8,128,1,float16,fp8,0,1.87118927637736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,16,8,128,1,float16,float16,0,2.105130672454834
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,16,16,128,1,float16,fp8,0,1.0791626771291096
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,16,16,128,1,float16,float16,0,1.170698642730713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,16,1,128,1,float16,float16,0,0.9866986274719238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,16,1,128,1,float16,fp8,0,1.11081067721049
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,16,2,128,1,float16,float16,0,1.0170293649037678
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,16,2,128,1,float16,fp8,0,0.8815999825795492
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,16,4,128,1,float16,float16,0,1.1264959971110027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,16,4,128,1,float16,fp8,0,1.303615967432658
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,16,8,128,1,float16,float16,0,1.1391039689381917
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,16,8,128,1,float16,fp8,0,1.0421226819356282
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,16,16,128,1,float16,float16,0,0.6337759892145792
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,16,1,128,1,float16,float16,0,0.5578133265177408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,16,1,128,1,float16,fp8,0,0.47863467534383136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,16,16,128,1,float16,fp8,0,0.5921013355255127
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,16,2,128,1,float16,float16,0,0.5702293316523234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,16,2,128,1,float16,fp8,0,0.49371735254923504
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,16,4,128,1,float16,float16,0,0.6235306660334269
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,16,4,128,1,float16,fp8,0,0.5493280092875162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,16,8,128,1,float16,float16,0,0.6272480090459188
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,16,8,128,1,float16,fp8,0,0.5591253439585367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,16,1,128,1,float16,fp8,0,2.1975413958231607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,16,1,128,1,float16,float16,0,2.5970826148986816
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,16,2,128,1,float16,float16,0,2.7057228088378906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,16,2,128,1,float16,fp8,0,2.305306593577067
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,16,4,128,1,float16,float16,0,2.9758294423421225
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,16,4,128,1,float16,fp8,0,2.9319893519083657
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,16,8,128,1,float16,fp8,0,2.778970718383789
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,16,8,128,1,float16,float16,0,3.3369973500569663
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,16,16,128,1,float16,float16,0,1.6146772702534993
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,16,1,128,1,float16,float16,0,1.3377973238627117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,16,16,128,1,float16,fp8,0,1.5249759356180828
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,16,1,128,1,float16,fp8,0,1.1440213521321614
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,16,2,128,1,float16,fp8,0,1.2201386292775471
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,16,2,128,1,float16,float16,0,1.3697867393493652
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,16,4,128,1,float16,float16,0,1.577290693918864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,16,4,128,1,float16,fp8,0,1.3864906628926594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,16,8,128,1,float16,fp8,0,1.3824693361918132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,16,8,128,1,float16,float16,0,1.5392212867736816
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,16,16,128,1,float16,float16,0,0.8719200293223063
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,16,16,128,1,float16,fp8,0,0.8110079765319824
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,16,1,128,1,float16,float16,0,0.7239786783854166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,16,1,128,1,float16,fp8,0,0.6997013092041016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,16,2,128,1,float16,fp8,0,0.6571679910024008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,16,2,128,1,float16,float16,0,0.7535200119018555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,16,4,128,1,float16,float16,0,0.8370933532714844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,16,4,128,1,float16,fp8,0,0.7656213442484537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,16,8,128,1,float16,fp8,0,0.7489013671875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,16,8,128,1,float16,float16,0,0.8267093499501547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,16,16,128,1,float16,float16,0,0.48102935155232746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,16,1,128,1,float16,float16,0,0.4107946554819743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,16,1,128,1,float16,fp8,0,0.35866133371988934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,16,16,128,1,float16,fp8,0,0.4517279863357544
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,16,2,128,1,float16,fp8,0,0.3696213165918986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,16,4,128,1,float16,fp8,0,0.4102880160013835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,16,2,128,1,float16,float16,0,0.42074668407440186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,16,4,128,1,float16,float16,0,0.4651413361231486
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,16,8,128,1,float16,fp8,0,0.4260053237279256
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,16,8,128,1,float16,float16,0,0.4643839995066325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,16,1,128,1,float16,fp8,0,2.8485228220621743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,16,1,128,1,float16,float16,0,3.2410879135131836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,16,2,128,1,float16,fp8,0,3.0064214070638022
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,16,2,128,1,float16,float16,0,3.3877172470092773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,16,4,128,1,float16,float16,0,4.392106691996257
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,16,4,128,1,float16,fp8,0,3.76146666208903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,16,8,128,1,float16,float16,0,4.015935897827148
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,16,8,128,1,float16,fp8,0,3.673765182495117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,16,16,128,1,float16,float16,0,2.4822239875793457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,16,1,128,1,float16,float16,0,1.6903200149536133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,16,16,128,1,float16,fp8,0,2.0709813435872397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,16,1,128,1,float16,fp8,0,1.469173272450765
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,16,2,128,1,float16,float16,0,1.7579466501871746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,16,2,128,1,float16,fp8,0,1.5393013954162598
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,16,4,128,1,float16,fp8,0,1.7958134015401204
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,16,4,128,1,float16,float16,0,2.0293973286946616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,16,8,128,1,float16,float16,0,2.1455252965291343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,16,16,128,1,float16,float16,0,1.1246453126271565
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,16,8,128,1,float16,fp8,0,1.9433226585388184
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,16,1,128,1,float16,float16,0,0.9184693495432535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,16,16,128,1,float16,fp8,0,1.0801653067270915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,16,1,128,1,float16,fp8,0,0.9184746742248535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,16,2,128,1,float16,fp8,0,0.8268746534983317
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,16,2,128,1,float16,float16,0,0.9396320184071859
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,16,4,128,1,float16,float16,0,1.0705119768778484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,16,4,128,1,float16,fp8,0,0.9543146292368571
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,16,8,128,1,float16,fp8,0,0.9684106508890787
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,16,8,128,1,float16,float16,0,1.1171627044677734
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,16,16,128,1,float16,float16,0,0.607205351193746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,16,1,128,1,float16,float16,0,0.499781330426534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,16,16,128,1,float16,fp8,0,0.5822720130284628
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,16,1,128,1,float16,fp8,0,0.43271998564402264
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,16,2,128,1,float16,float16,0,0.5147999922434489
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,16,2,128,1,float16,fp8,0,0.45366934935251874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,16,4,128,1,float16,fp8,0,0.5611413319905599
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,16,4,128,1,float16,float16,0,0.5979573329289755
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,16,8,128,1,float16,float16,0,0.5747520128885905
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,16,8,128,1,float16,fp8,0,0.5393120050430298
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,16,16,128,1,float16,fp8,0,0.3290613293647766
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,16,16,128,1,float16,float16,0,0.34252798557281494
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,16,1,128,1,float16,fp8,0,0.2565760016441345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,16,1,128,1,float16,float16,0,0.2848693331082662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,16,2,128,1,float16,float16,0,0.29207466046015423
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,16,2,128,1,float16,fp8,0,0.2611839969952901
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,16,4,128,1,float16,fp8,0,0.29362666606903076
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,16,4,128,1,float16,float16,0,0.3203253348668416
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,16,8,128,1,float16,float16,0,0.33877333005269367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,16,8,128,1,float16,fp8,0,0.30209600925445557
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,16,1,128,1,float16,float16,0,1.9378132820129395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,16,1,128,1,float16,fp8,0,1.7180479367574055
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,16,2,128,1,float16,float16,0,2.04528538386027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,16,2,128,1,float16,fp8,0,1.819658597310384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,16,4,128,1,float16,fp8,0,2.2071305910746255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,16,4,128,1,float16,float16,0,3.0504639943440757
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,16,8,128,1,float16,fp8,0,2.248997370402018
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,16,8,128,1,float16,float16,0,2.429466724395752
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,16,1,128,1,float16,float16,0,1.0413973331451416
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,16,16,128,1,float16,float16,0,1.3704373041788738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,16,1,128,1,float16,fp8,0,0.8889973163604736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,16,16,128,1,float16,fp8,0,1.3456586201985676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,16,2,128,1,float16,float16,0,1.0981706778208415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,16,2,128,1,float16,fp8,0,0.960197369257609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,16,4,128,1,float16,float16,0,1.2815199693044026
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,16,4,128,1,float16,fp8,0,1.2566986878712971
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,16,8,128,1,float16,float16,0,1.322981357574463
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,16,8,128,1,float16,fp8,0,1.2362453142801921
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,16,16,128,1,float16,float16,0,0.7205653190612793
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,16,16,128,1,float16,fp8,0,0.7063360214233398
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,16,1,128,1,float16,fp8,0,0.486410657564799
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,16,1,128,1,float16,float16,0,0.5606613159179688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,16,2,128,1,float16,float16,0,0.5778666734695435
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,16,2,128,1,float16,fp8,0,0.5123840173085531
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,16,4,128,1,float16,fp8,0,0.6608213186264038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,16,4,128,1,float16,float16,0,0.6966613133748373
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,16,8,128,1,float16,float16,0,0.7041973272959391
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,16,8,128,1,float16,fp8,0,0.6671040058135986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,16,1,128,1,float16,float16,0,0.30615999301274616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,16,1,128,1,float16,fp8,0,0.2696586648623149
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,16,16,128,1,float16,float16,0,0.395087997118632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,16,16,128,1,float16,fp8,0,0.38658666610717773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,16,2,128,1,float16,float16,0,0.3272800048192342
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,16,2,128,1,float16,fp8,0,0.2815626660982768
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,16,4,128,1,float16,fp8,0,0.35468800862630206
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,16,4,128,1,float16,float16,0,0.38547201951344806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,16,8,128,1,float16,float16,0,0.37380798657735187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,16,8,128,1,float16,fp8,0,0.3500106732050578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,16,1,128,1,float16,float16,0,0.18004266421000162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,16,16,128,1,float16,fp8,0,0.21863466501235962
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,16,16,128,1,float16,float16,0,0.22670400142669678
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,16,1,128,1,float16,fp8,0,0.1623360017935435
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,16,2,128,1,float16,float16,0,0.18491733074188232
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,16,2,128,1,float16,fp8,0,0.16885866721471152
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,16,4,128,1,float16,fp8,0,0.192848006884257
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,16,4,128,1,float16,float16,0,0.20223466555277506
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,16,8,128,1,float16,float16,0,0.20654932657877603
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,16,8,128,1,float16,fp8,0,0.19515732924143472
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,16,1,128,1,float16,fp8,0,1.652959982554118
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,16,1,128,1,float16,float16,0,1.844538688659668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,16,2,128,1,float16,fp8,0,1.7987146377563477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,16,2,128,1,float16,float16,0,1.9846506118774414
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,16,4,128,1,float16,fp8,0,2.3562560081481934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,16,4,128,1,float16,float16,0,2.661365350087484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,16,8,128,1,float16,float16,0,2.5491627057393393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,16,8,128,1,float16,fp8,0,2.535045305887858
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,16,1,128,1,float16,float16,0,1.0464106400807698
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,16,16,128,1,float16,float16,0,1.43612273534139
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,16,16,128,1,float16,fp8,0,1.46670929590861
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,16,1,128,1,float16,fp8,0,0.8449973265329996
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,16,2,128,1,float16,float16,0,1.0485333601633708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,16,2,128,1,float16,fp8,0,0.9310080210367838
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,16,4,128,1,float16,float16,0,1.3148639996846516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,16,4,128,1,float16,fp8,0,1.200112024943034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,16,8,128,1,float16,float16,0,1.3741493225097656
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,16,8,128,1,float16,fp8,0,1.3229333559672039
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,16,16,128,1,float16,float16,0,0.7553866704305013
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,16,1,128,1,float16,float16,0,0.5159306526184082
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,16,16,128,1,float16,fp8,0,0.7596373558044434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,16,1,128,1,float16,fp8,0,0.4580213228861491
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,16,2,128,1,float16,float16,0,0.5566666523615519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,16,2,128,1,float16,fp8,0,0.5010506709416708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,16,4,128,1,float16,fp8,0,0.6316320101420084
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,16,4,128,1,float16,float16,0,0.7263039747873942
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,16,8,128,1,float16,float16,0,0.6971039772033691
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,16,8,128,1,float16,fp8,0,0.6461120049158732
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,16,1,128,1,float16,float16,0,0.2853013277053833
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,16,16,128,1,float16,float16,0,0.3998826742172241
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,16,16,128,1,float16,fp8,0,0.4044106801350911
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,16,1,128,1,float16,fp8,0,0.2512586712837219
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,16,2,128,1,float16,float16,0,0.30189333359400433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,16,2,128,1,float16,fp8,0,0.27139200766881305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,16,4,128,1,float16,float16,0,0.3731040159861247
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,16,4,128,1,float16,fp8,0,0.37508801619211835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,16,8,128,1,float16,float16,0,0.3666613499323527
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,16,8,128,1,float16,fp8,0,0.3768800099690755
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,16,16,128,1,float16,float16,0,0.22466667493184408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,16,1,128,1,float16,fp8,0,0.14500266313552856
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,16,1,128,1,float16,float16,0,0.16088533401489258
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,16,16,128,1,float16,fp8,0,0.22283732891082764
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,16,2,128,1,float16,fp8,0,0.15452266732851663
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,16,2,128,1,float16,float16,0,0.1697280009587606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,16,4,128,1,float16,fp8,0,0.18678400913874307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,16,4,128,1,float16,float16,0,0.19700799385706583
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,16,8,128,1,float16,float16,0,0.21683200200398764
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,16,8,128,1,float16,fp8,0,0.19840532541275024
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,16,16,128,1,float16,fp8,0,0.12152533729871114
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,16,16,128,1,float16,float16,0,0.12598933776219687
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,16,1,128,1,float16,fp8,0,0.09300800164540608
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,16,1,128,1,float16,float16,0,0.10159466663996379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,16,2,128,1,float16,float16,0,0.10526933272679646
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,16,2,128,1,float16,fp8,0,0.09718400239944458
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,16,4,128,1,float16,float16,0,0.1172106663386027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,16,4,128,1,float16,fp8,0,0.1133013367652893
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,16,8,128,1,float16,float16,0,0.11712533235549927
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,16,8,128,1,float16,fp8,0,0.11421333750089009
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,16,1,128,1,float16,fp8,0,1.0167146523793538
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,16,1,128,1,float16,float16,0,1.1494346459706624
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,16,2,128,1,float16,float16,0,1.2416959603627522
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,16,2,128,1,float16,fp8,0,1.1315146287282307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,16,4,128,1,float16,fp8,0,1.523695945739746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,16,4,128,1,float16,float16,0,1.7486507097880046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,16,8,128,1,float16,float16,0,1.6737546920776367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,16,8,128,1,float16,fp8,0,1.5636533101399739
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,16,1,128,1,float16,float16,0,0.6105920076370239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,16,1,128,1,float16,fp8,0,0.535647988319397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,16,16,128,1,float16,float16,0,0.9521493117014567
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,16,16,128,1,float16,fp8,0,0.9891093571980795
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,16,2,128,1,float16,float16,0,0.6602720022201538
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,16,2,128,1,float16,fp8,0,0.5902506510416666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,16,4,128,1,float16,float16,0,0.8651253382364908
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,16,4,128,1,float16,fp8,0,0.7849760055541992
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,16,8,128,1,float16,fp8,0,0.817039966583252
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,16,8,128,1,float16,float16,0,0.8574186960856119
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,16,16,128,1,float16,float16,0,0.4991146723429362
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,16,16,128,1,float16,fp8,0,0.5173600117365519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,16,1,128,1,float16,float16,0,0.3248639901479085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,16,1,128,1,float16,fp8,0,0.2923626701037089
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,16,2,128,1,float16,float16,0,0.35120534896850586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,16,2,128,1,float16,fp8,0,0.32050132751464844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,16,4,128,1,float16,fp8,0,0.44628798961639404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,16,4,128,1,float16,float16,0,0.4687039852142334
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,16,8,128,1,float16,fp8,0,0.43997331460316974
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,16,8,128,1,float16,float16,0,0.4794880151748657
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,16,16,128,1,float16,float16,0,0.2696853280067444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,16,1,128,1,float16,float16,0,0.18151466051737467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,16,16,128,1,float16,fp8,0,0.27674667040507
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,16,1,128,1,float16,fp8,0,0.15869333346684775
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,16,2,128,1,float16,fp8,0,0.17264533042907715
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,16,2,128,1,float16,float16,0,0.19574934244155884
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,16,4,128,1,float16,float16,0,0.24828267097473145
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,16,4,128,1,float16,fp8,0,0.23333332935969034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,16,8,128,1,float16,fp8,0,0.24050132433573404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,16,8,128,1,float16,float16,0,0.250874658425649
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,16,16,128,1,float16,float16,0,0.15130133430163065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,16,16,128,1,float16,fp8,0,0.15255467096964517
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,16,1,128,1,float16,float16,0,0.10458133618036906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,16,1,128,1,float16,fp8,0,0.09531733393669128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,16,2,128,1,float16,float16,0,0.10989866654078166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,16,2,128,1,float16,fp8,0,0.10429333647092183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,16,4,128,1,float16,fp8,0,0.12609600027402243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,16,4,128,1,float16,float16,0,0.12793599565823874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,16,8,128,1,float16,float16,0,0.136954665184021
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,16,8,128,1,float16,fp8,0,0.1291306714216868
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,16,1,128,1,float16,float16,0,0.06628266473611195
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,16,1,128,1,float16,fp8,0,0.06339199841022491
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,16,16,128,1,float16,float16,0,0.08063466846942902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,16,16,128,1,float16,fp8,0,0.08311999837557475
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,16,2,128,1,float16,float16,0,0.06861333549022675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,16,2,128,1,float16,fp8,0,0.0664213349421819
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,16,4,128,1,float16,float16,0,0.07878933350245158
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,16,4,128,1,float16,fp8,0,0.0767680009206136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,16,8,128,1,float16,fp8,0,0.07885866860548656
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,16,8,128,1,float16,float16,0,0.07912000020345052
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,16,1,128,1,float16,float16,0,1.141034682591756
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,16,1,128,1,float16,fp8,0,1.0231733322143555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,16,2,128,1,float16,float16,0,1.277077356974284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,16,2,128,1,float16,fp8,0,1.189519961675008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,16,4,128,1,float16,float16,0,1.9662399291992188
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,16,4,128,1,float16,fp8,0,1.9575413068135579
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,16,8,128,1,float16,float16,0,1.8076640764872234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,16,16,128,1,float16,float16,0,1.0696693261464436
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,16,8,128,1,float16,fp8,0,1.7575680414835613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,16,1,128,1,float16,float16,0,0.6017760038375854
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,16,1,128,1,float16,fp8,0,0.5326506694157919
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,16,16,128,1,float16,fp8,0,1.1493866443634033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,16,2,128,1,float16,float16,0,0.6764106750488281
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,16,2,128,1,float16,fp8,0,0.6119146744410197
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,16,4,128,1,float16,float16,0,0.9451946417490641
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,16,4,128,1,float16,fp8,0,0.8715253671010336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,16,8,128,1,float16,float16,0,1.0156160195668538
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,16,8,128,1,float16,fp8,0,1.0061333179473877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,16,16,128,1,float16,float16,0,0.5528853336970011
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,16,1,128,1,float16,float16,0,0.31995199124018353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,16,1,128,1,float16,fp8,0,0.2895306746164958
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,16,16,128,1,float16,fp8,0,0.5939040184020996
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,16,2,128,1,float16,float16,0,0.35341866811116535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,16,2,128,1,float16,fp8,0,0.3320213357607524
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,16,4,128,1,float16,float16,0,0.4999839862187703
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,16,4,128,1,float16,fp8,0,0.5369813442230225
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,16,8,128,1,float16,float16,0,0.48395200570424396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,16,8,128,1,float16,fp8,0,0.5086400111516317
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,16,16,128,1,float16,float16,0,0.2916533350944519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,16,1,128,1,float16,fp8,0,0.15711466471354166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,16,1,128,1,float16,float16,0,0.17605332533518472
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,16,16,128,1,float16,fp8,0,0.3118346730868022
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,16,2,128,1,float16,float16,0,0.19372800985972086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,16,2,128,1,float16,fp8,0,0.17914666732152304
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,16,4,128,1,float16,float16,0,0.27742934226989746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,16,4,128,1,float16,fp8,0,0.2574933369954427
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,16,8,128,1,float16,float16,0,0.2620053291320801
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,16,8,128,1,float16,fp8,0,0.28304533163706463
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,16,16,128,1,float16,float16,0,0.1604106624921163
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,16,1,128,1,float16,float16,0,0.09596799810727437
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,16,16,128,1,float16,fp8,0,0.1693013310432434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,16,1,128,1,float16,fp8,0,0.08981333176294963
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,16,2,128,1,float16,fp8,0,0.09885332981745402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,16,2,128,1,float16,float16,0,0.1039520005385081
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,16,4,128,1,float16,float16,0,0.14197867115338644
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,16,4,128,1,float16,fp8,0,0.1306880017121633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,16,8,128,1,float16,float16,0,0.15223466356595358
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,16,8,128,1,float16,fp8,0,0.1427893340587616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,16,16,128,1,float16,fp8,0,0.08571733037630717
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,16,16,128,1,float16,float16,0,0.09122666716575623
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,16,1,128,1,float16,fp8,0,0.05820266902446747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,16,1,128,1,float16,float16,0,0.06058133145173391
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,16,2,128,1,float16,float16,0,0.06587199866771698
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,16,2,128,1,float16,fp8,0,0.06392533580462138
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,16,4,128,1,float16,float16,0,0.07646400233109792
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,16,4,128,1,float16,fp8,0,0.07737066845099132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,16,8,128,1,float16,fp8,0,0.08046933511892955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,16,8,128,1,float16,float16,0,0.07679466903209686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,16,16,128,1,float16,float16,0,0.04667733112970988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,16,16,128,1,float16,fp8,0,0.05179733534653982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,16,1,128,1,float16,float16,0,0.039701332648595176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,16,1,128,1,float16,fp8,0,0.037589333951473236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,16,2,128,1,float16,fp8,0,0.039546666045983635
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,16,2,128,1,float16,float16,0,0.04026666780312856
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,16,4,128,1,float16,float16,0,0.04607999821503957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,16,4,128,1,float16,fp8,0,0.04790933430194855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,16,8,128,1,float16,float16,0,0.04560000201066335
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,16,8,128,1,float16,fp8,0,0.047877331574757896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,16,1,128,1,float16,float16,0,0.7442773183186849
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,16,1,128,1,float16,fp8,0,0.6612533330917358
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,16,2,128,1,float16,float16,0,0.8456693490346273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,16,2,128,1,float16,fp8,0,0.7806560198465983
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,16,4,128,1,float16,float16,0,1.3099146684010823
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,16,4,128,1,float16,fp8,0,1.3106773694356282
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,16,8,128,1,float16,fp8,0,1.2120906511942546
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,16,8,128,1,float16,float16,0,1.2322826385498047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,16,16,128,1,float16,float16,0,0.7431999842325846
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,16,16,128,1,float16,fp8,0,0.8101973533630371
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,16,1,128,1,float16,float16,0,0.3949546813964844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,16,1,128,1,float16,fp8,0,0.35203198591868085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,16,2,128,1,float16,float16,0,0.44867201646169025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,16,2,128,1,float16,fp8,0,0.4148373206456502
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,16,4,128,1,float16,fp8,0,0.6096746524175009
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,16,4,128,1,float16,float16,0,0.6524480183919271
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,16,8,128,1,float16,float16,0,0.6529386838277181
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,16,8,128,1,float16,fp8,0,0.6376266479492188
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,16,16,128,1,float16,float16,0,0.38633068402608234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,16,16,128,1,float16,fp8,0,0.41929598649342853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,16,1,128,1,float16,fp8,0,0.1941279967625936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,16,1,128,1,float16,float16,0,0.21015467246373495
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,16,2,128,1,float16,float16,0,0.24088533719380698
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,16,2,128,1,float16,fp8,0,0.22738667329152426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,16,4,128,1,float16,float16,0,0.3580000003178914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,16,4,128,1,float16,fp8,0,0.3467573324839274
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,16,8,128,1,float16,fp8,0,0.34487998485565186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,16,8,128,1,float16,float16,0,0.3586133321126302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,16,16,128,1,float16,float16,0,0.20558400948842367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,16,1,128,1,float16,float16,0,0.11432533462842305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,16,1,128,1,float16,fp8,0,0.10248532891273499
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,16,16,128,1,float16,fp8,0,0.22107734282811484
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,16,2,128,1,float16,float16,0,0.13352533181508383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,16,2,128,1,float16,fp8,0,0.11355732878049214
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,16,4,128,1,float16,float16,0,0.18637865781784058
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,16,4,128,1,float16,fp8,0,0.18222399552663168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,16,8,128,1,float16,fp8,0,0.18604799111684164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,16,8,128,1,float16,float16,0,0.19021334250768027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,16,16,128,1,float16,float16,0,0.11355732878049214
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,16,1,128,1,float16,float16,0,0.06749333441257477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,16,1,128,1,float16,fp8,0,0.062037333846092224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,16,16,128,1,float16,fp8,0,0.11953066786130269
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,16,2,128,1,float16,float16,0,0.07218133409818013
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,16,2,128,1,float16,fp8,0,0.07019199927647908
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,16,4,128,1,float16,float16,0,0.08864532907803853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,16,4,128,1,float16,fp8,0,0.09324799974759419
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,16,8,128,1,float16,float16,0,0.0953386624654134
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,16,8,128,1,float16,fp8,0,0.09643200039863586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,16,16,128,1,float16,float16,0,0.05739733576774597
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,16,1,128,1,float16,float16,0,0.04417066772778829
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,16,16,128,1,float16,fp8,0,0.062261333068211876
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,16,2,128,1,float16,float16,0,0.04771733283996582
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,16,1,128,1,float16,fp8,0,0.04243200023969015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,16,2,128,1,float16,fp8,0,0.045834665497144066
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,16,4,128,1,float16,fp8,0,0.056799997886021934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,16,4,128,1,float16,float16,0,0.054570664962132774
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,16,8,128,1,float16,fp8,0,0.057861333092053734
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,16,8,128,1,float16,float16,0,0.05596266686916351
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,16,1,128,1,float16,float16,0,0.03155199935038885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,16,16,128,1,float16,float16,0,0.037589333951473236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,16,16,128,1,float16,fp8,0,0.04178666571776072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,16,1,128,1,float16,fp8,0,0.032170665760835014
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,16,2,128,1,float16,float16,0,0.033514666060606636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,16,4,128,1,float16,float16,0,0.03772266705830892
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,16,2,128,1,float16,fp8,0,0.033471999069054924
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,16,4,128,1,float16,fp8,0,0.03965866565704346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,16,8,128,1,float16,float16,0,0.037690666814645134
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,16,8,128,1,float16,fp8,0,0.03984000037113825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,16,1,128,1,float16,float16,0,0.8063519795735677
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,16,2,128,1,float16,float16,0,0.9204586346944174
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,16,2,128,1,float16,fp8,0,0.8808426856994629
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,16,1,128,1,float16,fp8,0,0.7163626352945963
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,16,4,128,1,float16,float16,0,1.6194079717000325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,16,4,128,1,float16,fp8,0,1.3179252942403157
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,16,8,128,1,float16,float16,0,1.4602773984273274
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,16,8,128,1,float16,fp8,0,1.5871307055155437
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,16,1,128,1,float16,float16,0,0.41541866461435956
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,16,16,128,1,float16,float16,0,0.8948907057444254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,16,1,128,1,float16,fp8,0,0.3732000192006429
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,16,16,128,1,float16,fp8,0,0.987226645151774
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,16,2,128,1,float16,float16,0,0.48788265387217206
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,16,2,128,1,float16,fp8,0,0.46056532859802246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,16,4,128,1,float16,float16,0,0.7806666692097982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,16,4,128,1,float16,fp8,0,0.7205866972605387
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,16,8,128,1,float16,float16,0,0.7396053473154703
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,16,1,128,1,float16,float16,0,0.2228053410847982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,16,8,128,1,float16,fp8,0,0.8505919774373373
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,16,16,128,1,float16,fp8,0,0.5050719976425171
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,16,16,128,1,float16,float16,0,0.45586133003234863
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,16,1,128,1,float16,fp8,0,0.19979200760523477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,16,2,128,1,float16,float16,0,0.25593066215515137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,16,2,128,1,float16,fp8,0,0.24635199705759683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,16,4,128,1,float16,fp8,0,0.38120532035827637
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,16,4,128,1,float16,float16,0,0.4134346644083659
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,16,8,128,1,float16,float16,0,0.3892800013224284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,16,8,128,1,float16,fp8,0,0.41333866119384766
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,16,1,128,1,float16,float16,0,0.12145066261291504
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,16,16,128,1,float16,fp8,0,0.26603732506434125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,16,16,128,1,float16,float16,0,0.23947733640670776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,16,1,128,1,float16,fp8,0,0.10920533537864685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,16,2,128,1,float16,float16,0,0.14065066973368326
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,16,2,128,1,float16,fp8,0,0.13194666306177774
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,16,4,128,1,float16,float16,0,0.21440533796946207
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,16,4,128,1,float16,fp8,0,0.213754673798879
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,16,8,128,1,float16,float16,0,0.2263466715812683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,16,8,128,1,float16,fp8,0,0.22984532515207926
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,16,16,128,1,float16,float16,0,0.12980266412099203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,16,1,128,1,float16,float16,0,0.06672533353169759
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,16,16,128,1,float16,fp8,0,0.14170666535695395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,16,1,128,1,float16,fp8,0,0.06418666740258534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,16,2,128,1,float16,fp8,0,0.07236266632874806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,16,2,128,1,float16,float16,0,0.07250666618347168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,16,4,128,1,float16,float16,0,0.1032373309135437
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,16,4,128,1,float16,fp8,0,0.10409599542617798
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,16,8,128,1,float16,fp8,0,0.10864532987276714
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,16,8,128,1,float16,float16,0,0.12186132868131001
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,16,1,128,1,float16,float16,0,0.0415786678592364
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,16,16,128,1,float16,float16,0,0.07132799923419952
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,16,16,128,1,float16,fp8,0,0.0684853345155716
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,16,1,128,1,float16,fp8,0,0.039877332746982574
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,16,2,128,1,float16,float16,0,0.045706664522488914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,16,2,128,1,float16,fp8,0,0.04587733248869578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,16,4,128,1,float16,float16,0,0.056143999099731445
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,16,4,128,1,float16,fp8,0,0.06001066664854685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,16,8,128,1,float16,float16,0,0.05663999915122986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,16,8,128,1,float16,fp8,0,0.06227200229962667
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,16,16,128,1,float16,fp8,0,0.03965866565704346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,16,16,128,1,float16,float16,0,0.03572266548871994
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,16,1,128,1,float16,float16,0,0.027269333600997925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,16,1,128,1,float16,fp8,0,0.027317332724730175
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,16,2,128,1,float16,float16,0,0.029477333029111225
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,16,2,128,1,float16,fp8,0,0.029215998947620392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,16,4,128,1,float16,float16,0,0.03352533280849457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,16,4,128,1,float16,fp8,0,0.0364479993780454
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,16,8,128,1,float16,float16,0,0.03428266694148382
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,16,8,128,1,float16,fp8,0,0.03754133234421412
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,16,16,128,1,float16,float16,0,0.02935466667016347
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,16,1,128,1,float16,float16,0,0.025360000630219776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,16,16,128,1,float16,fp8,0,0.031386665999889374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,16,1,128,1,float16,fp8,0,0.023717333873112995
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,16,2,128,1,float16,float16,0,0.025231999655564625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,16,2,128,1,float16,fp8,0,0.025429333249727886
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,16,4,128,1,float16,float16,0,0.029311999678611755
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,16,4,128,1,float16,fp8,0,0.02924799919128418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,16,8,128,1,float16,float16,0,0.029071999092896778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,16,8,128,1,float16,fp8,0,0.030975999931494396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,16,1,128,1,float16,float16,0,0.6385120153427124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,16,1,128,1,float16,fp8,0,0.5629173517227173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,16,2,128,1,float16,float16,0,0.7623466650644938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,16,2,128,1,float16,fp8,0,0.7242293357849121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,16,4,128,1,float16,fp8,0,1.487808068593343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,16,4,128,1,float16,float16,0,1.4379146893819172
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,16,8,128,1,float16,float16,0,1.253941297531128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,16,16,128,1,float16,float16,0,0.7983840306599935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,16,16,128,1,float16,fp8,0,0.9069120089213053
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,16,1,128,1,float16,float16,0,0.33738664786020917
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,16,8,128,1,float16,fp8,0,1.318837324778239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,16,1,128,1,float16,fp8,0,0.29736000299453735
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,16,2,128,1,float16,fp8,0,0.38014400005340576
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,16,2,128,1,float16,float16,0,0.40611199537913006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,16,4,128,1,float16,float16,0,0.6792853673299154
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,16,4,128,1,float16,fp8,0,0.7477386792500814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,16,16,128,1,float16,float16,0,0.4090026617050171
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,16,8,128,1,float16,float16,0,0.6489866574605306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,16,8,128,1,float16,fp8,0,0.6600693464279175
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,16,16,128,1,float16,fp8,0,0.4621973435084025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,16,1,128,1,float16,float16,0,0.1767359972000122
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,16,1,128,1,float16,fp8,0,0.15811199943224588
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,16,2,128,1,float16,float16,0,0.21040532986323038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,16,2,128,1,float16,fp8,0,0.20415467023849487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,16,4,128,1,float16,float16,0,0.3564213514328003
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,16,4,128,1,float16,fp8,0,0.4066773255666097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,16,8,128,1,float16,float16,0,0.3476800123850505
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,16,16,128,1,float16,float16,0,0.21199466784795126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,16,8,128,1,float16,fp8,0,0.3933493296305339
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,16,1,128,1,float16,fp8,0,0.08567999800046285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,16,16,128,1,float16,fp8,0,0.24041599035263062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,16,1,128,1,float16,float16,0,0.09490666786829631
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,16,2,128,1,float16,float16,0,0.11312533418337505
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,16,2,128,1,float16,fp8,0,0.10665067036946614
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,16,4,128,1,float16,fp8,0,0.2023306687672933
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,16,4,128,1,float16,float16,0,0.1981546680132548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,16,8,128,1,float16,float16,0,0.18347734212875366
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,16,8,128,1,float16,fp8,0,0.18557333946228027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,16,16,128,1,float16,float16,0,0.1132319966952006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,16,16,128,1,float16,fp8,0,0.1276586651802063
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,16,1,128,1,float16,float16,0,0.05193600058555603
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,16,1,128,1,float16,fp8,0,0.04897066454092661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,16,2,128,1,float16,fp8,0,0.059706668059031166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,16,4,128,1,float16,float16,0,0.09194667140642802
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,16,2,128,1,float16,float16,0,0.05781333148479462
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,16,4,128,1,float16,fp8,0,0.08886933326721191
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,16,8,128,1,float16,fp8,0,0.09388267000516255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,16,8,128,1,float16,float16,0,0.10667199889818828
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,16,16,128,1,float16,float16,0,0.058176000912984215
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,16,16,128,1,float16,fp8,0,0.060346667965253196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,16,1,128,1,float16,float16,0,0.03201066702604294
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,16,1,128,1,float16,fp8,0,0.03163733333349228
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,16,2,128,1,float16,float16,0,0.03552533437808355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,16,2,128,1,float16,fp8,0,0.03736533224582672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,16,4,128,1,float16,float16,0,0.04667200148105621
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,16,4,128,1,float16,fp8,0,0.05211733281612396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,16,8,128,1,float16,float16,0,0.046853333711624146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,16,8,128,1,float16,fp8,0,0.05310399830341339
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,16,1,128,1,float16,float16,0,0.021173333128293354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,16,16,128,1,float16,float16,0,0.029552000264326733
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,16,16,128,1,float16,fp8,0,0.035445332527160645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,16,1,128,1,float16,fp8,0,0.021562665700912476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,16,2,128,1,float16,float16,0,0.022991999983787537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,16,4,128,1,float16,float16,0,0.029322666426499683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,16,2,128,1,float16,fp8,0,0.02349333216746648
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,16,4,128,1,float16,fp8,0,0.031445334355036415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,16,8,128,1,float16,float16,0,0.027535999814669292
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,16,8,128,1,float16,fp8,0,0.0315733328461647
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,16,1,128,1,float16,float16,0,0.019088000059127808
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,16,16,128,1,float16,float16,0,0.023061332603295643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,16,1,128,1,float16,fp8,0,0.0189280000825723
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,16,16,128,1,float16,fp8,0,0.025216000775496166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,16,2,128,1,float16,float16,0,0.019573333362738293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,16,4,128,1,float16,float16,0,0.023423999547958374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,16,2,128,1,float16,fp8,0,0.02096533278624217
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,16,4,128,1,float16,fp8,0,0.0232640008131663
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,16,8,128,1,float16,float16,0,0.02345066765944163
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,16,8,128,1,float16,fp8,0,0.025455998877684276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,16,1,128,1,float16,float16,0,0.019167999426523846
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,16,16,128,1,float16,float16,0,0.021040000021457672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,16,16,128,1,float16,fp8,0,0.021146667500336964
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,16,1,128,1,float16,fp8,0,0.01911466692884763
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,16,2,128,1,float16,float16,0,0.019029332945744198
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,16,4,128,1,float16,float16,0,0.01977066695690155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,16,4,128,1,float16,fp8,0,0.021312000850836437
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,16,2,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,16,8,128,1,float16,fp8,0,0.021205333371957142
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,16,8,128,1,float16,float16,0,0.01933866615096728
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,16,1,128,1,float16,float16,0,0.26506133874257404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,16,1,128,1,float16,fp8,0,0.24152000745137533
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,16,2,128,1,float16,float16,0,0.33552531401316327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,16,2,128,1,float16,fp8,0,0.3311840097109477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,16,4,128,1,float16,float16,0,0.650874654452006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,16,4,128,1,float16,fp8,0,0.6461973190307617
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,16,16,128,1,float16,float16,0,0.37223466237386066
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,16,16,128,1,float16,fp8,0,0.4365333318710327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,16,8,128,1,float16,fp8,0,0.720192035039266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,16,1,128,1,float16,float16,0,0.1383573313554128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,16,8,128,1,float16,float16,0,0.6681866645812988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,16,1,128,1,float16,fp8,0,0.13125866651535034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,16,2,128,1,float16,float16,0,0.18246400356292725
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,16,2,128,1,float16,fp8,0,0.17913599809010824
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,16,4,128,1,float16,fp8,0,0.37703998883565265
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,16,4,128,1,float16,float16,0,0.32340800762176514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,16,8,128,1,float16,float16,0,0.30753066142400104
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,16,16,128,1,float16,float16,0,0.1933599909146627
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,16,8,128,1,float16,fp8,0,0.3657386700312297
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,16,16,128,1,float16,fp8,0,0.22468799352645874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,16,1,128,1,float16,float16,0,0.07610133290290833
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,16,1,128,1,float16,fp8,0,0.0685280015071233
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,16,2,128,1,float16,float16,0,0.09224533041318257
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,16,4,128,1,float16,float16,0,0.17728533347447714
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,16,2,128,1,float16,fp8,0,0.09116266171137492
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,16,4,128,1,float16,fp8,0,0.195306658744812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,16,8,128,1,float16,float16,0,0.15786133209864298
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,16,16,128,1,float16,float16,0,0.10533333818117778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,16,8,128,1,float16,fp8,0,0.19851199785868326
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,16,16,128,1,float16,fp8,0,0.12032000223795573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,16,1,128,1,float16,float16,0,0.04207466542720795
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,16,1,128,1,float16,fp8,0,0.042037333051363625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,16,2,128,1,float16,float16,0,0.047728002071380615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,16,2,128,1,float16,fp8,0,0.05206400156021118
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,16,4,128,1,float16,float16,0,0.07792533437410991
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,16,4,128,1,float16,fp8,0,0.08236266672611237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,16,8,128,1,float16,fp8,0,0.08711999654769897
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,16,8,128,1,float16,float16,0,0.09700799981753032
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,16,16,128,1,float16,float16,0,0.053632001082102455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,16,16,128,1,float16,fp8,0,0.054287999868392944
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,16,1,128,1,float16,float16,0,0.026565333207448322
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,16,1,128,1,float16,fp8,0,0.025519999365011852
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,16,2,128,1,float16,float16,0,0.029472000896930695
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,16,2,128,1,float16,fp8,0,0.031471999982992806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,16,4,128,1,float16,float16,0,0.04041066765785217
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,16,4,128,1,float16,fp8,0,0.045594667394955955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,16,8,128,1,float16,float16,0,0.041749333341916404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,16,8,128,1,float16,fp8,0,0.047685335079828896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,16,16,128,1,float16,fp8,0,0.03146133323510488
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,16,16,128,1,float16,float16,0,0.025594666600227356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,16,1,128,1,float16,float16,0,0.017887999614079792
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,16,1,128,1,float16,fp8,0,0.01929066702723503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,16,2,128,1,float16,fp8,0,0.021189334491888683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,16,2,128,1,float16,float16,0,0.01937599976857503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,16,4,128,1,float16,float16,0,0.025392000873883564
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,16,4,128,1,float16,fp8,0,0.027701333165168762
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,16,8,128,1,float16,float16,0,0.025568000972270966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,16,8,128,1,float16,fp8,0,0.02940800040960312
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,16,16,128,1,float16,float16,0,0.021274665991465252
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,16,1,128,1,float16,float16,0,0.016906666258970898
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,16,16,128,1,float16,fp8,0,0.023498666783173878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,16,1,128,1,float16,fp8,0,0.01586666703224182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,16,2,128,1,float16,fp8,0,0.018405333161354065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,16,2,128,1,float16,float16,0,0.01720000058412552
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,16,4,128,1,float16,float16,0,0.019296000401178997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,16,4,128,1,float16,fp8,0,0.021173333128293354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,16,8,128,1,float16,float16,0,0.02089066555102666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,16,8,128,1,float16,fp8,0,0.021136000752449036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,16,16,128,1,float16,fp8,0,0.018922666708628338
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,16,1,128,1,float16,float16,0,0.015130666395028433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,16,16,128,1,float16,float16,0,0.01743999992807706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,16,1,128,1,float16,fp8,0,0.015024000157912573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,16,2,128,1,float16,fp8,0,0.015216000378131866
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,16,2,128,1,float16,float16,0,0.014906667172908783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,16,4,128,1,float16,float16,0,0.017082666357358296
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,16,4,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,16,8,128,1,float16,float16,0,0.017423999806245167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,16,8,128,1,float16,fp8,0,0.017045332739750545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,16,1,128,1,float16,float16,0,0.015098666151364645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,16,16,128,1,float16,float16,0,0.016938666502634685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,16,1,128,1,float16,fp8,0,0.01498666654030482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,16,16,128,1,float16,fp8,0,0.01666133354107539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,16,2,128,1,float16,float16,0,0.015397333850463232
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,16,2,128,1,float16,fp8,0,0.01526933287580808
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,16,4,128,1,float16,fp8,0,0.015274666249752045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,16,4,128,1,float16,float16,0,0.015018666783968607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,16,8,128,1,float16,float16,0,0.015194666882356008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,16,8,128,1,float16,fp8,0,0.0162773331006368
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,16,1,128,1,float16,fp8,0,0.1476800044377645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,16,1,128,1,float16,float16,0,0.16019200285275778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,16,2,128,1,float16,float16,0,0.20160533984502158
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,16,2,128,1,float16,fp8,0,0.1909439961115519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,16,4,128,1,float16,float16,0,0.3404639959335327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,16,4,128,1,float16,fp8,0,0.32682132720947266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,16,8,128,1,float16,float16,0,0.32621333996454877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,16,16,128,1,float16,float16,0,0.20339733362197876
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,16,16,128,1,float16,fp8,0,0.23407467206319174
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,16,8,128,1,float16,fp8,0,0.3431253433227539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,16,1,128,1,float16,float16,0,0.08711466193199158
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,16,1,128,1,float16,fp8,0,0.08065600196520488
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,16,2,128,1,float16,float16,0,0.10935999949773152
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,16,2,128,1,float16,fp8,0,0.10197333494822185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,16,4,128,1,float16,fp8,0,0.20673600832621256
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,16,4,128,1,float16,float16,0,0.18877333402633667
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,16,8,128,1,float16,float16,0,0.169487992922465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,16,8,128,1,float16,fp8,0,0.1811199982961019
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,16,16,128,1,float16,fp8,0,0.12152533729871114
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,16,16,128,1,float16,float16,0,0.10784000158309937
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,16,1,128,1,float16,float16,0,0.04605866471926371
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,16,1,128,1,float16,fp8,0,0.044639999667803444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,16,2,128,1,float16,float16,0,0.05207466582457224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,16,2,128,1,float16,fp8,0,0.05524266759554545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,16,4,128,1,float16,float16,0,0.09230400125185649
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,16,4,128,1,float16,fp8,0,0.084906667470932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,16,8,128,1,float16,float16,0,0.09987200299898784
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,16,8,128,1,float16,fp8,0,0.0918933351834615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,16,16,128,1,float16,float16,0,0.057402665416399636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,16,16,128,1,float16,fp8,0,0.05669866502285004
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,16,1,128,1,float16,float16,0,0.029391999046007793
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,16,1,128,1,float16,fp8,0,0.029322666426499683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,16,2,128,1,float16,float16,0,0.033546666304270424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,16,2,128,1,float16,fp8,0,0.03340800106525421
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,16,4,128,1,float16,fp8,0,0.047872001926104225
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,16,4,128,1,float16,float16,0,0.04375466704368591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,16,8,128,1,float16,fp8,0,0.049786667029062905
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,16,8,128,1,float16,float16,0,0.043706665436426796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,16,16,128,1,float16,float16,0,0.027402666707833607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,16,16,128,1,float16,fp8,0,0.03345600018898646
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,16,1,128,1,float16,float16,0,0.019199999670187633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,16,2,128,1,float16,float16,0,0.021221332252025604
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,16,1,128,1,float16,fp8,0,0.019461333751678467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,16,2,128,1,float16,fp8,0,0.021125334004561108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,16,4,128,1,float16,float16,0,0.027114666998386383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,16,4,128,1,float16,fp8,0,0.029296000798543293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,16,8,128,1,float16,fp8,0,0.029504001140594482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,16,8,128,1,float16,float16,0,0.026709333062171936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,16,16,128,1,float16,float16,0,0.019071999937295914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,16,16,128,1,float16,fp8,0,0.021333334346612293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,16,1,128,1,float16,float16,0,0.014298666268587112
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,16,1,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,16,2,128,1,float16,fp8,0,0.01618133361140887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,16,2,128,1,float16,float16,0,0.015418666104475657
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,16,4,128,1,float16,fp8,0,0.019381333142518997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,16,4,128,1,float16,float16,0,0.017221332838137943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,16,8,128,1,float16,float16,0,0.018592000007629395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,16,8,128,1,float16,fp8,0,0.020213333268960316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,16,16,128,1,float16,float16,0,0.014922666052977243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,16,16,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,16,1,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,16,1,128,1,float16,fp8,0,0.013157332936922709
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,16,4,128,1,float16,float16,0,0.014943999548753103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,16,2,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,16,2,128,1,float16,float16,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,16,4,128,1,float16,fp8,0,0.015173333386580149
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,16,8,128,1,float16,float16,0,0.014560000350077948
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,16,8,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,16,1,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,16,16,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,16,16,128,1,float16,fp8,0,0.013264000415802002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,16,1,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,16,2,128,1,float16,float16,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,16,4,128,1,float16,float16,0,0.012863999853531519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,16,2,128,1,float16,fp8,0,0.012997332960367203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,16,4,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,16,8,128,1,float16,float16,0,0.01303999995191892
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,16,8,128,1,float16,fp8,0,0.01331199953953425
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,16,1,128,1,float16,float16,0,0.013173333058754602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,16,16,128,1,float16,float16,0,0.012970666090647379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,16,16,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,16,1,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,16,2,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,16,4,128,1,float16,float16,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,16,4,128,1,float16,fp8,0,0.013429333766301474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,16,2,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,16,8,128,1,float16,float16,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,16,8,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,16,1,128,1,float16,fp8,0,0.11528000235557556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,16,1,128,1,float16,float16,0,0.13272000352541605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,16,2,128,1,float16,fp8,0,0.13555199901262918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,16,2,128,1,float16,float16,0,0.15083733201026917
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,16,4,128,1,float16,float16,0,0.23421865701675415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,16,4,128,1,float16,fp8,0,0.24010133743286133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,16,8,128,1,float16,float16,0,0.21146667003631592
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,16,8,128,1,float16,fp8,0,0.2153173287709554
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,16,16,128,1,float16,float16,0,0.12904533743858337
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,16,16,128,1,float16,fp8,0,0.14059733351071677
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,16,1,128,1,float16,float16,0,0.06838933130105336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,16,1,128,1,float16,fp8,0,0.0626933326323827
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,16,2,128,1,float16,float16,0,0.07548266649246216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,16,2,128,1,float16,fp8,0,0.07228800157705943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,16,4,128,1,float16,float16,0,0.11158399780591328
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,16,4,128,1,float16,fp8,0,0.10470933715502422
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,16,16,128,1,float16,float16,0,0.06692266464233398
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,16,8,128,1,float16,fp8,0,0.1129813293615977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,16,8,128,1,float16,float16,0,0.12333333492279053
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,16,16,128,1,float16,fp8,0,0.0680159976085027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,16,1,128,1,float16,float16,0,0.03992533435424169
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,16,1,128,1,float16,fp8,0,0.03557866563399633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,16,2,128,1,float16,float16,0,0.043696001172065735
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,16,2,128,1,float16,fp8,0,0.04164800047874451
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,16,4,128,1,float16,float16,0,0.05401599903901418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,16,4,128,1,float16,fp8,0,0.05604266623655955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,16,8,128,1,float16,fp8,0,0.058037335673967995
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,16,8,128,1,float16,float16,0,0.05426133175690969
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,16,1,128,1,float16,float16,0,0.025568000972270966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,16,16,128,1,float16,float16,0,0.033344000577926636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,16,16,128,1,float16,fp8,0,0.037647999823093414
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,16,1,128,1,float16,fp8,0,0.024218666056791942
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,16,2,128,1,float16,float16,0,0.027050666511058807
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,16,2,128,1,float16,fp8,0,0.025290665527184803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,16,4,128,1,float16,float16,0,0.03156266609827677
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,16,4,128,1,float16,fp8,0,0.033674667278925575
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,16,8,128,1,float16,fp8,0,0.03551999976237615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,16,8,128,1,float16,float16,0,0.03341866781314214
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,16,16,128,1,float16,fp8,0,0.023285334308942158
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,16,16,128,1,float16,float16,0,0.021344001094500225
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,16,1,128,1,float16,float16,0,0.01703466723362605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,16,2,128,1,float16,float16,0,0.01921066641807556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,16,1,128,1,float16,fp8,0,0.017509333789348602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,16,2,128,1,float16,fp8,0,0.018901333212852478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,16,4,128,1,float16,float16,0,0.02130666623512904
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,16,4,128,1,float16,fp8,0,0.021295999487241108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,16,8,128,1,float16,fp8,0,0.023120000958442688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,16,8,128,1,float16,float16,0,0.021984001000722248
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,16,16,128,1,float16,float16,0,0.015130666395028433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,16,16,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,16,1,128,1,float16,float16,0,0.013125333935022354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,16,1,128,1,float16,fp8,0,0.013461332768201828
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,16,2,128,1,float16,float16,0,0.012986666212479273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,16,2,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,16,4,128,1,float16,float16,0,0.015018666783968607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,16,4,128,1,float16,fp8,0,0.015562667200962702
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,16,8,128,1,float16,float16,0,0.015050667027632395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,16,8,128,1,float16,fp8,0,0.015247999380032221
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,16,16,128,1,float16,float16,0,0.01321600005030632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,16,16,128,1,float16,fp8,0,0.013861333330472311
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,16,1,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,16,1,128,1,float16,float16,0,0.012970666090647379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,16,2,128,1,float16,float16,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,16,2,128,1,float16,fp8,0,0.013162666310866674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,16,4,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,16,4,128,1,float16,float16,0,0.012794667234023413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,16,8,128,1,float16,fp8,0,0.013530666629473368
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,16,8,128,1,float16,float16,0,0.013167999684810638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,16,16,128,1,float16,float16,0,0.012533333152532578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,16,16,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,16,1,128,1,float16,float16,0,0.012773333738247553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,16,1,128,1,float16,fp8,0,0.012773333738247553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,16,2,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,16,2,128,1,float16,float16,0,0.011871999750534693
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,16,4,128,1,float16,float16,0,0.01309866706530253
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,16,4,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,16,8,128,1,float16,float16,0,0.012389333297808966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,16,8,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,16,16,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,16,1,128,1,float16,float16,0,0.010981333752473196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,16,16,128,1,float16,float16,0,0.012863999853531519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,16,1,128,1,float16,fp8,0,0.010954666882753372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,16,2,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,16,2,128,1,float16,float16,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,16,4,128,1,float16,float16,0,0.012784000486135483
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,16,4,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,16,8,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,16,8,128,1,float16,float16,0,0.012826666235923767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,16,1,128,1,float16,fp8,0,0.09742933511734009
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,16,1,128,1,float16,float16,0,0.11611200372378032
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,16,2,128,1,float16,fp8,0,0.10469866792360942
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,16,2,128,1,float16,float16,0,0.12866666913032532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,16,4,128,1,float16,float16,0,0.17161067326863608
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,16,4,128,1,float16,fp8,0,0.1455519994099935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,16,8,128,1,float16,fp8,0,0.1628213326136271
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,16,8,128,1,float16,float16,0,0.1681386629740397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,16,16,128,1,float16,fp8,0,0.0876693328221639
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,16,16,128,1,float16,float16,0,0.09321066737174988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,16,1,128,1,float16,float16,0,0.06308266520500183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,16,1,128,1,float16,fp8,0,0.054229333996772766
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,16,2,128,1,float16,float16,0,0.06640000144640605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,16,2,128,1,float16,fp8,0,0.058559998869895935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,16,4,128,1,float16,fp8,0,0.0726986676454544
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,16,4,128,1,float16,float16,0,0.07671999931335449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,16,8,128,1,float16,float16,0,0.07934399942557017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,16,8,128,1,float16,fp8,0,0.07442133128643036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,16,16,128,1,float16,float16,0,0.043791999419530235
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,16,16,128,1,float16,fp8,0,0.045941332976023354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,16,1,128,1,float16,float16,0,0.03738133360942205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,16,1,128,1,float16,fp8,0,0.03327466547489166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,16,2,128,1,float16,float16,0,0.03789333254098892
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,16,2,128,1,float16,fp8,0,0.0354720006386439
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,16,4,128,1,float16,float16,0,0.043840001026789345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,16,4,128,1,float16,fp8,0,0.041637333730856575
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,16,8,128,1,float16,fp8,0,0.04381333291530609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,16,8,128,1,float16,float16,0,0.043578664461771645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,16,16,128,1,float16,float16,0,0.027509334186712902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,16,1,128,1,float16,float16,0,0.02350933353106181
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,16,16,128,1,float16,fp8,0,0.029093332588672638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,16,1,128,1,float16,fp8,0,0.021482666333516438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,16,2,128,1,float16,float16,0,0.025087999800841015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,16,2,128,1,float16,fp8,0,0.023157333334287006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,16,4,128,1,float16,float16,0,0.02733866622050603
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,16,4,128,1,float16,fp8,0,0.027456000447273254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,16,8,128,1,float16,float16,0,0.027119999130566914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,16,8,128,1,float16,fp8,0,0.027535999814669292
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,16,16,128,1,float16,float16,0,0.017914666483799618
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,16,1,128,1,float16,float16,0,0.016917333006858826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,16,16,128,1,float16,fp8,0,0.019167999426523846
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,16,2,128,1,float16,float16,0,0.017317333569129307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,16,1,128,1,float16,fp8,0,0.015317333241303762
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,16,2,128,1,float16,fp8,0,0.01534933348496755
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,16,4,128,1,float16,float16,0,0.02086399992307027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,16,4,128,1,float16,fp8,0,0.01749333366751671
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,16,8,128,1,float16,float16,0,0.01746133342385292
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,16,8,128,1,float16,fp8,0,0.019098666807015736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,16,16,128,1,float16,float16,0,0.013173333058754602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,16,1,128,1,float16,float16,0,0.012879999975363413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,16,16,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,16,2,128,1,float16,float16,0,0.01341333364446958
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,16,1,128,1,float16,fp8,0,0.013317332913478216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,16,2,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,16,4,128,1,float16,float16,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,16,4,128,1,float16,fp8,0,0.013221333424250284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,16,8,128,1,float16,float16,0,0.013072000195582708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,16,8,128,1,float16,fp8,0,0.013221333424250284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,16,16,128,1,float16,float16,0,0.012970666090647379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,16,1,128,1,float16,fp8,0,0.012831999609867731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,16,1,128,1,float16,float16,0,0.012831999609867731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,16,16,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,16,2,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,16,2,128,1,float16,float16,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,16,4,128,1,float16,float16,0,0.01312000056107839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,16,4,128,1,float16,fp8,0,0.012794667234023413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,16,8,128,1,float16,float16,0,0.012768000364303589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,16,8,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,16,1,128,1,float16,float16,0,0.012778667112191519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,16,1,128,1,float16,fp8,0,0.012794667234023413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,16,16,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,16,2,128,1,float16,float16,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,16,2,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,16,16,128,1,float16,float16,0,0.011711999773979187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,16,4,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,16,4,128,1,float16,float16,0,0.01102399950226148
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,16,8,128,1,float16,fp8,0,0.011429333438475927
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,16,8,128,1,float16,float16,0,0.01257066677014033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,16,1,128,1,float16,float16,0,0.01099733387430509
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,16,1,128,1,float16,fp8,0,0.011989332735538483
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,16,16,128,1,float16,fp8,0,0.012389333297808966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,16,16,128,1,float16,float16,0,0.011557333171367645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,16,2,128,1,float16,float16,0,0.011306667079528173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,16,2,128,1,float16,fp8,0,0.012784000486135483
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,16,4,128,1,float16,float16,0,0.011253333340088526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,16,4,128,1,float16,fp8,0,0.012810666114091873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,16,8,128,1,float16,float16,0,0.011231999844312668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,16,8,128,1,float16,fp8,0,0.012810666114091873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,16,1,128,1,float16,float16,0,0.11010133226712544
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,16,1,128,1,float16,fp8,0,0.08901866277058919
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,16,2,128,1,float16,float16,0,0.11206400394439697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,16,2,128,1,float16,fp8,0,0.09501333038012187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,16,4,128,1,float16,float16,0,0.12869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,16,4,128,1,float16,fp8,0,0.10946666200955708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,16,8,128,1,float16,float16,0,0.13417067130406699
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,16,8,128,1,float16,fp8,0,0.11667199929555257
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,16,16,128,1,float16,fp8,0,0.06438399851322174
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,16,16,128,1,float16,float16,0,0.07442133128643036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,16,1,128,1,float16,float16,0,0.05996799965699514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,16,1,128,1,float16,fp8,0,0.04996799925963084
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,16,2,128,1,float16,float16,0,0.062314664324124656
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,16,2,128,1,float16,fp8,0,0.05221333106358846
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,16,4,128,1,float16,float16,0,0.06658133367697398
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,16,4,128,1,float16,fp8,0,0.060378665725390114
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,16,16,128,1,float16,fp8,0,0.037818667789300285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,16,8,128,1,float16,float16,0,0.06843733290831248
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,16,16,128,1,float16,float16,0,0.03972800076007843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,16,8,128,1,float16,fp8,0,0.060362666845321655
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,16,1,128,1,float16,float16,0,0.03596800069014231
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,16,2,128,1,float16,float16,0,0.03698666642109553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,16,1,128,1,float16,fp8,0,0.032799998919169106
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,16,2,128,1,float16,fp8,0,0.031541332602500916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,16,4,128,1,float16,fp8,0,0.036090667049090065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,16,4,128,1,float16,float16,0,0.03956266740957896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,16,8,128,1,float16,fp8,0,0.035562666753927864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,16,8,128,1,float16,float16,0,0.03979733337958654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,16,1,128,1,float16,float16,0,0.023423999547958374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,16,1,128,1,float16,fp8,0,0.021253332495689392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,16,16,128,1,float16,float16,0,0.025461333493391674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,16,16,128,1,float16,fp8,0,0.025040000677108765
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,16,2,128,1,float16,float16,0,0.023226665953795116
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,16,2,128,1,float16,fp8,0,0.021066665649414062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,16,4,128,1,float16,float16,0,0.025279998779296875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,16,4,128,1,float16,fp8,0,0.023103999594847362
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,16,8,128,1,float16,float16,0,0.023418667415777843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,16,8,128,1,float16,fp8,0,0.023242667317390442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,16,16,128,1,float16,float16,0,0.017312000195185345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,16,16,128,1,float16,fp8,0,0.016949333250522614
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,16,1,128,1,float16,float16,0,0.01632000009218852
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,16,1,128,1,float16,fp8,0,0.015077333897352219
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,16,2,128,1,float16,float16,0,0.01714666684468587
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,16,2,128,1,float16,fp8,0,0.014943999548753103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,16,4,128,1,float16,float16,0,0.01691199963291486
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,16,4,128,1,float16,fp8,0,0.01552533358335495
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,16,8,128,1,float16,float16,0,0.01703466723362605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,16,8,128,1,float16,fp8,0,0.015770666301250458
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,16,16,128,1,float16,float16,0,0.013248000293970108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,16,1,128,1,float16,float16,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,16,1,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,16,16,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,16,2,128,1,float16,float16,0,0.013066666821638743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,16,2,128,1,float16,fp8,0,0.013264000415802002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,16,4,128,1,float16,float16,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,16,4,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,16,8,128,1,float16,float16,0,0.013066666821638743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,16,8,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,16,16,128,1,float16,float16,0,0.012960000584522883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,16,1,128,1,float16,float16,0,0.010954666882753372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,16,16,128,1,float16,fp8,0,0.01314666618903478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,16,1,128,1,float16,fp8,0,0.011952000359694162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,16,2,128,1,float16,float16,0,0.012800000607967377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,16,2,128,1,float16,fp8,0,0.012730666746695837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,16,4,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,16,4,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,16,8,128,1,float16,float16,0,0.012858666479587555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,16,8,128,1,float16,fp8,0,0.012826666235923767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,16,16,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,16,16,128,1,float16,float16,0,0.011695999652147293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,16,1,128,1,float16,float16,0,0.011183999478816986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,16,1,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,16,2,128,1,float16,fp8,0,0.01259200026591619
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,16,2,128,1,float16,float16,0,0.011370666325092316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,16,4,128,1,float16,float16,0,0.012768000364303589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,16,4,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,16,8,128,1,float16,float16,0,0.011168000598748526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,16,8,128,1,float16,fp8,0,0.012831999609867731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,16,1,128,1,float16,float16,0,0.011029332876205444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,16,1,128,1,float16,fp8,0,0.011333333949247995
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,16,16,128,1,float16,float16,0,0.011509332805871964
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,16,16,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,16,2,128,1,float16,float16,0,0.011157333850860596
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,16,2,128,1,float16,fp8,0,0.011066666493813196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,16,4,128,1,float16,float16,0,0.011087999989589056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,16,8,128,1,float16,fp8,0,0.01240533341964086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,16,4,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,16,8,128,1,float16,float16,0,0.011018666128317514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,0,0.013050666699806849
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,0,0.013183999806642532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,0,0.01403733342885971
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,0,0.015210667004187902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,0,0.01926400015751521
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,0,0.017322666943073273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,0,0.02739199995994568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,16,16,128,1,float16,float16,0,0.0273333340883255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,16,16,128,1,float16,fp8,0,0.02515733242034912
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,0,0.02533866713444392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,0,0.01098666712641716
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,0,0.011136000355084738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,0,0.012768000364303589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,0,0.015072000523408255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,0,0.013253333667914072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,0,0.01929066702723503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,16,16,128,1,float16,float16,0,0.019082666685183842
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,0,0.01710933322707812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,16,16,128,1,float16,fp8,0,0.017210666090250015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,0,0.010714666297038397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,0,0.010784000158309937
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,0,0.011007999380429586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,0,0.012746666868527731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,0,0.011829332758982977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,0,0.014938666174809137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,16,16,128,1,float16,float16,0,0.014831999937693277
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,0,0.011109333485364914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,16,16,128,1,float16,fp8,0,0.014965333044528961
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,0,0.010933333386977514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,0,0.010266666611035665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,0,0.01119999960064888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,0,0.010863999525705973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,0,0.011557333171367645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,0,0.010853332777818045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,0,0.01109333336353302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,16,16,128,1,float16,fp8,0,0.012437333663304647
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,16,16,128,1,float16,float16,0,0.011792000383138657
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,0,0.010885333021481832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,0,0.010885333021481832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,0,0.0107893335322539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,0,0.009877333417534828
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,0,0.010933333386977514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,0,0.01062400018175443
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,0,0.00960533320903778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,16,16,128,1,float16,float16,0,0.010821333775917688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,0,0.010666667173306147
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,16,16,128,1,float16,fp8,0,0.010890666395425797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,0,0.008933333059151968
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,0,0.010682666053374609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,0,0.011061333119869232
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,0,0.009984000275532404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,0,0.010773333410422007
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,0,0.010816000401973724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,16,16,128,1,float16,float16,0,0.009338666374484697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,0,0.01080000028014183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,16,16,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,0,0.010928000013033548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,0,0.010693332801262537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,0,0.010570666442314783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,0,0.008805333326260248
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,0,0.010768000036478043
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,0,0.010634666929642359
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,0,0.009162666896979014
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,0,0.009786666681369146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,16,16,128,1,float16,float16,0,0.010762666662534079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,16,16,128,1,float16,fp8,0,0.010703999549150467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,0,0.0100853331387043
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,0,0.009248000259200731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,0,0.010687999427318573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,0,0.009733333562811216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,0,0.010746666540702185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,16,16,128,1,float16,float16,0,0.010618666807810465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,0,0.008901333436369896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,16,16,128,1,float16,fp8,0,0.010570666442314783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,0,0.010693332801262537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,0,0.009088000282645226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,0,0.010629333555698395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,0,0.010368000095089277
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,0,0.010175999874869982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,0,0.00980266680320104
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,12,1,128,1,float16,float16,0,4.60914675394694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,12,1,128,1,float16,fp8,0,4.058202743530273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,12,2,128,1,float16,float16,0,4.7265974680582685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,12,2,128,1,float16,fp8,0,4.167162577311198
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,12,4,128,1,float16,fp8,0,4.984378814697266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,12,12,128,1,float16,float16,0,2.979994773864746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,12,12,128,1,float16,fp8,0,2.641775925954183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,12,4,128,1,float16,float16,0,6.447823842366536
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,12,1,128,1,float16,float16,0,2.3971360524495444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,12,1,128,1,float16,fp8,0,2.09115203221639
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,12,2,128,1,float16,float16,0,2.521893342336019
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,12,2,128,1,float16,fp8,0,2.1752427419026694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,12,12,128,1,float16,float16,0,1.6168106396993
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,12,4,128,1,float16,fp8,0,2.539050738016764
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,12,4,128,1,float16,float16,0,2.895658810933431
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,12,12,128,1,float16,fp8,0,1.3880480130513508
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,12,1,128,1,float16,fp8,0,1.121461311976115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,12,1,128,1,float16,float16,0,1.3391307195027669
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,12,2,128,1,float16,fp8,0,1.1572693188985188
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,12,2,128,1,float16,float16,0,1.3375253677368164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,12,4,128,1,float16,float16,0,1.4648747444152832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,12,12,128,1,float16,fp8,0,0.7658613522847494
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,12,12,128,1,float16,float16,0,0.8251679738362631
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,12,4,128,1,float16,fp8,0,1.302613337834676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,12,1,128,1,float16,float16,0,0.725536028544108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,12,1,128,1,float16,fp8,0,0.6266719897588094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,12,2,128,1,float16,float16,0,0.7470080057779948
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,12,2,128,1,float16,fp8,0,0.6387093464533488
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,12,4,128,1,float16,fp8,0,0.7518080075581869
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,12,4,128,1,float16,float16,0,0.8158506552378336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,12,1,128,1,float16,fp8,0,2.3746347427368164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,12,1,128,1,float16,float16,0,2.712554613749186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,12,2,128,1,float16,fp8,0,2.5140746434529624
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,12,2,128,1,float16,float16,0,2.8163839975992837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,12,12,128,1,float16,float16,0,1.7581334114074707
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,12,4,128,1,float16,float16,0,3.3695198694864907
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,12,4,128,1,float16,fp8,0,3.4099413553873696
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,12,12,128,1,float16,fp8,0,2.0589760144551597
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,12,1,128,1,float16,float16,0,1.4369333585103352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,12,2,128,1,float16,fp8,0,1.3168266614278157
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,12,2,128,1,float16,float16,0,1.4767786661783855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,12,1,128,1,float16,fp8,0,1.5926666259765625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,12,12,128,1,float16,float16,0,0.9420639673868815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,12,12,128,1,float16,fp8,0,0.8895306587219238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,12,4,128,1,float16,fp8,0,1.6205333073933919
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,12,4,128,1,float16,float16,0,1.726469357808431
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,12,1,128,1,float16,fp8,0,0.6890827020009359
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,12,1,128,1,float16,float16,0,0.790175994237264
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,12,2,128,1,float16,float16,0,0.8192799886067709
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,12,2,128,1,float16,fp8,0,0.7641013463338217
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,12,12,128,1,float16,float16,0,0.5299946864446005
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,12,4,128,1,float16,fp8,0,0.8479999701182047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,12,4,128,1,float16,float16,0,0.9335253238677979
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,12,12,128,1,float16,fp8,0,0.532975991566976
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,12,1,128,1,float16,fp8,0,0.39487465222676593
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,12,2,128,1,float16,float16,0,0.4617120027542114
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,12,1,128,1,float16,float16,0,0.4511679808298747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,12,2,128,1,float16,fp8,0,0.40563201904296875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,12,4,128,1,float16,float16,0,0.5178026755650839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,12,4,128,1,float16,fp8,0,0.45153601964314777
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,12,1,128,1,float16,fp8,0,1.731023947397868
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,12,1,128,1,float16,float16,0,1.964245319366455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,12,2,128,1,float16,fp8,0,1.8166613578796387
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,12,2,128,1,float16,float16,0,2.037893295288086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,12,4,128,1,float16,float16,0,2.4312267303466797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,12,4,128,1,float16,fp8,0,2.1559200286865234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,12,12,128,1,float16,float16,0,1.3318133354187012
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,12,1,128,1,float16,float16,0,1.047925313313802
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,12,1,128,1,float16,fp8,0,0.9111039638519287
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,12,12,128,1,float16,fp8,0,1.5190827051798503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,12,2,128,1,float16,float16,0,1.0864799817403157
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,12,2,128,1,float16,fp8,0,0.9606346289316813
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,12,4,128,1,float16,float16,0,1.2695573170979817
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,12,4,128,1,float16,fp8,0,1.162719964981079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,12,12,128,1,float16,float16,0,0.714629332224528
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,12,12,128,1,float16,fp8,0,0.6804373264312744
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,12,1,128,1,float16,float16,0,0.5850720008214315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,12,1,128,1,float16,fp8,0,0.5044746796290079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,12,2,128,1,float16,float16,0,0.6009226640065511
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,12,2,128,1,float16,fp8,0,0.5383679866790771
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,12,4,128,1,float16,float16,0,0.7013813654581705
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,12,4,128,1,float16,fp8,0,0.6424160003662109
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,12,12,128,1,float16,float16,0,0.4045013189315796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,12,12,128,1,float16,fp8,0,0.38258135318756104
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,12,1,128,1,float16,float16,0,0.3365120093027751
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,12,1,128,1,float16,fp8,0,0.2983146707216899
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,12,2,128,1,float16,float16,0,0.3429386615753174
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,12,2,128,1,float16,fp8,0,0.30993600686391193
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,12,4,128,1,float16,fp8,0,0.3456960121790568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,12,4,128,1,float16,float16,0,0.38786133130391437
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,12,1,128,1,float16,fp8,0,2.228389263153076
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,12,1,128,1,float16,float16,0,2.6306079228719077
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,12,2,128,1,float16,float16,0,2.6610026359558105
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,12,2,128,1,float16,fp8,0,2.4003893534342446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,12,12,128,1,float16,float16,0,1.773530642191569
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,12,12,128,1,float16,fp8,0,1.7251359621683757
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,12,4,128,1,float16,float16,0,3.416378657023112
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,12,4,128,1,float16,fp8,0,3.24996280670166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,12,1,128,1,float16,float16,0,1.3244640032450359
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,12,1,128,1,float16,fp8,0,1.1660266717274983
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,12,2,128,1,float16,fp8,0,1.2511893113454182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,12,2,128,1,float16,float16,0,1.4564107259114583
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,12,4,128,1,float16,float16,0,1.7624533971150715
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,12,4,128,1,float16,fp8,0,1.5045013427734375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,12,12,128,1,float16,float16,0,0.9343573252360026
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,12,12,128,1,float16,fp8,0,0.9048960208892822
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,12,1,128,1,float16,float16,0,0.7226080099741617
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,12,1,128,1,float16,fp8,0,0.6286186774571737
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,12,2,128,1,float16,fp8,0,0.6685012976328532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,12,2,128,1,float16,float16,0,0.758570671081543
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,12,4,128,1,float16,float16,0,0.8956267038981119
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,12,4,128,1,float16,fp8,0,0.8057066599527994
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,12,12,128,1,float16,fp8,0,0.4936639865239461
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,12,12,128,1,float16,float16,0,0.5105813344319662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,12,1,128,1,float16,float16,0,0.40535998344421387
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,12,1,128,1,float16,fp8,0,0.34945066769917804
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,12,2,128,1,float16,float16,0,0.42054398854573566
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,12,2,128,1,float16,fp8,0,0.37112001578013104
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,12,4,128,1,float16,float16,0,0.4938400189081828
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,12,4,128,1,float16,fp8,0,0.45267200469970703
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,12,12,128,1,float16,fp8,0,0.27937066555023193
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,12,12,128,1,float16,float16,0,0.2940693298975627
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,12,1,128,1,float16,float16,0,0.23747734228769937
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,12,1,128,1,float16,fp8,0,0.21124800046284994
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,12,2,128,1,float16,float16,0,0.24313600858052573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,12,2,128,1,float16,fp8,0,0.2239840030670166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,12,4,128,1,float16,float16,0,0.2653226653734843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,12,4,128,1,float16,fp8,0,0.2507999936739604
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,12,1,128,1,float16,float16,0,1.5647519429524739
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,12,1,128,1,float16,fp8,0,1.3676533699035645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,12,2,128,1,float16,float16,0,1.6214879353841145
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,12,2,128,1,float16,fp8,0,1.4777599970499675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,12,12,128,1,float16,float16,0,1.138159990310669
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,12,4,128,1,float16,fp8,0,1.8680213292439778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,12,12,128,1,float16,fp8,0,1.1365546385447185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,12,4,128,1,float16,float16,0,2.1963626543680825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,12,1,128,1,float16,float16,0,0.8124799728393555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,12,1,128,1,float16,fp8,0,0.7238826751708984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,12,2,128,1,float16,float16,0,0.8577386538187662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,12,2,128,1,float16,fp8,0,0.7790239651997884
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,12,4,128,1,float16,fp8,0,0.9859466552734375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,12,12,128,1,float16,fp8,0,0.6069440046946207
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,12,4,128,1,float16,float16,0,1.1050933202107747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,12,12,128,1,float16,float16,0,0.6059360106786092
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,12,1,128,1,float16,float16,0,0.45079465707143146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,12,1,128,1,float16,fp8,0,0.399397333463033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,12,2,128,1,float16,float16,0,0.47139732042948407
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,12,2,128,1,float16,fp8,0,0.42242133617401123
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,12,4,128,1,float16,fp8,0,0.5392693281173706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,12,4,128,1,float16,float16,0,0.5886293252309164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,12,12,128,1,float16,fp8,0,0.33186666170756024
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,12,12,128,1,float16,float16,0,0.3361599842707316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,12,1,128,1,float16,float16,0,0.25121599435806274
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,12,1,128,1,float16,fp8,0,0.22794665892918906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,12,2,128,1,float16,float16,0,0.26080532868703205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,12,2,128,1,float16,fp8,0,0.24029332399368286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,12,4,128,1,float16,float16,0,0.32850666840871173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,12,4,128,1,float16,fp8,0,0.3036106626192729
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,12,12,128,1,float16,float16,0,0.19896533091862997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,12,12,128,1,float16,fp8,0,0.18545599778493246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,12,1,128,1,float16,float16,0,0.15775466958681741
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,12,1,128,1,float16,fp8,0,0.14381333192189535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,12,2,128,1,float16,float16,0,0.16184000174204508
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,12,2,128,1,float16,fp8,0,0.1507253348827362
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,12,4,128,1,float16,float16,0,0.18031466007232666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,12,4,128,1,float16,fp8,0,0.1752693255742391
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,12,1,128,1,float16,float16,0,1.47216002146403
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,12,1,128,1,float16,fp8,0,1.3279893398284912
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,12,2,128,1,float16,float16,0,1.6102933883666992
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,12,2,128,1,float16,fp8,0,1.4933706919352214
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,12,4,128,1,float16,fp8,0,1.9451200167338054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,12,4,128,1,float16,float16,0,2.305642604827881
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,12,12,128,1,float16,float16,0,1.217898686726888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,12,1,128,1,float16,float16,0,0.781925360361735
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,12,12,128,1,float16,fp8,0,1.257749319076538
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,12,1,128,1,float16,fp8,0,0.6951573689778646
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,12,2,128,1,float16,float16,0,0.8504266738891602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,12,2,128,1,float16,fp8,0,0.7836266358693441
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,12,4,128,1,float16,float16,0,1.1372000376383464
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,12,12,128,1,float16,float16,0,0.6437066793441772
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,12,4,128,1,float16,fp8,0,1.1456373532613118
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,12,12,128,1,float16,fp8,0,0.6548106670379639
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,12,1,128,1,float16,float16,0,0.4198826551437378
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,12,1,128,1,float16,fp8,0,0.3794613281885783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,12,2,128,1,float16,float16,0,0.46430401007334393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,12,2,128,1,float16,fp8,0,0.4237653414408366
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,12,4,128,1,float16,float16,0,0.6015146573384603
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,12,4,128,1,float16,fp8,0,0.5661493142445883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,12,12,128,1,float16,fp8,0,0.3511999845504761
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,12,1,128,1,float16,float16,0,0.23305066426595053
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,12,12,128,1,float16,float16,0,0.3442026774088542
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,12,1,128,1,float16,fp8,0,0.20909333229064941
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,12,2,128,1,float16,float16,0,0.2534293333689372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,12,2,128,1,float16,fp8,0,0.22512533267339072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,12,4,128,1,float16,float16,0,0.3288639982541402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,12,4,128,1,float16,fp8,0,0.3136853377024333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,12,12,128,1,float16,fp8,0,0.19476799170176187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,12,12,128,1,float16,float16,0,0.19538132349650064
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,12,1,128,1,float16,fp8,0,0.12431466579437256
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,12,1,128,1,float16,float16,0,0.13613333304723105
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,12,2,128,1,float16,float16,0,0.14194666345914206
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,12,2,128,1,float16,fp8,0,0.13613866766293845
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,12,4,128,1,float16,float16,0,0.16990933815638223
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,12,4,128,1,float16,fp8,0,0.16572800278663635
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,12,1,128,1,float16,fp8,0,0.08076266447703044
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,12,1,128,1,float16,float16,0,0.08708799878756206
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,12,12,128,1,float16,float16,0,0.1035040020942688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,12,12,128,1,float16,fp8,0,0.10671466588973999
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,12,2,128,1,float16,float16,0,0.09142933289210002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,12,2,128,1,float16,fp8,0,0.08474133412043254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,12,4,128,1,float16,float16,0,0.1006773312886556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,12,4,128,1,float16,fp8,0,0.10139733552932739
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,12,2,128,1,float16,float16,0,1.0192426840464275
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,12,2,128,1,float16,fp8,0,0.9695573647816976
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,12,1,128,1,float16,fp8,0,0.836026668548584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,12,1,128,1,float16,float16,0,0.9226986567179362
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,12,12,128,1,float16,float16,0,0.829637368520101
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,12,12,128,1,float16,fp8,0,0.8610826333363851
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,12,4,128,1,float16,float16,0,1.50763734181722
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,12,4,128,1,float16,fp8,0,1.5442186991373699
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,12,2,128,1,float16,float16,0,0.5392640034357706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,12,2,128,1,float16,fp8,0,0.5048906803131104
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,12,1,128,1,float16,fp8,0,0.44598933060963947
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,12,1,128,1,float16,float16,0,0.4997653166453044
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,12,4,128,1,float16,float16,0,0.77020263671875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,12,4,128,1,float16,fp8,0,0.8143786589304606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,12,12,128,1,float16,fp8,0,0.4526880184809367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,12,12,128,1,float16,float16,0,0.43304534753163654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,12,1,128,1,float16,float16,0,0.2713386615117391
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,12,1,128,1,float16,fp8,0,0.24672534068425497
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,12,2,128,1,float16,float16,0,0.2948853373527527
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,12,2,128,1,float16,fp8,0,0.275002658367157
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,12,4,128,1,float16,float16,0,0.4124159812927246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,12,4,128,1,float16,fp8,0,0.3883306582768758
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,12,12,128,1,float16,fp8,0,0.2437173326810201
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,12,12,128,1,float16,float16,0,0.236735999584198
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,12,1,128,1,float16,float16,0,0.1461013356844584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,12,1,128,1,float16,fp8,0,0.13793067137400308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,12,2,128,1,float16,float16,0,0.16030399998029074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,12,2,128,1,float16,fp8,0,0.14825066924095154
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,12,4,128,1,float16,float16,0,0.22524267435073853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,12,4,128,1,float16,fp8,0,0.19676266113917032
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,12,1,128,1,float16,float16,0,0.09186133742332458
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,12,1,128,1,float16,fp8,0,0.08659199873606364
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,12,12,128,1,float16,float16,0,0.13596266508102417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,12,12,128,1,float16,fp8,0,0.12873599926630655
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,12,2,128,1,float16,float16,0,0.09728533029556274
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,12,2,128,1,float16,fp8,0,0.0940106709798177
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,12,4,128,1,float16,float16,0,0.11416533589363098
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,12,4,128,1,float16,fp8,0,0.11663466691970825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,12,12,128,1,float16,float16,0,0.06635733445485432
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,12,1,128,1,float16,float16,0,0.055813332398732506
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,12,1,128,1,float16,fp8,0,0.053360000252723694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,12,12,128,1,float16,fp8,0,0.07134933272997539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,12,2,128,1,float16,float16,0,0.0574239989121755
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,12,2,128,1,float16,fp8,0,0.05619200070699056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,12,4,128,1,float16,fp8,0,0.06836799780527751
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,12,4,128,1,float16,float16,0,0.06604266663392384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,12,1,128,1,float16,float16,0,0.9405492941538492
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,12,1,128,1,float16,fp8,0,0.870533307393392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,12,2,128,1,float16,float16,0,1.0719093481699626
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,12,2,128,1,float16,fp8,0,1.0239626566569011
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,12,4,128,1,float16,float16,0,1.7028533617655437
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,12,12,128,1,float16,fp8,0,1.0134027004241943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,12,12,128,1,float16,float16,0,0.9387946923573812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,12,4,128,1,float16,fp8,0,1.765109380086263
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,12,1,128,1,float16,float16,0,0.4996639887491862
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,12,1,128,1,float16,fp8,0,0.4590880076090495
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,12,2,128,1,float16,float16,0,0.5685973167419434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,12,2,128,1,float16,fp8,0,0.5309866666793823
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,12,4,128,1,float16,float16,0,0.8661759694417318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,12,4,128,1,float16,fp8,0,0.9321333567301432
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,12,12,128,1,float16,float16,0,0.4880053202311198
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,12,1,128,1,float16,float16,0,0.27401600281397503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,12,12,128,1,float16,fp8,0,0.5273173252741495
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,12,2,128,1,float16,float16,0,0.3083680073420207
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,12,1,128,1,float16,fp8,0,0.24797866741816202
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,12,2,128,1,float16,fp8,0,0.2914346655209859
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,12,4,128,1,float16,fp8,0,0.4191733201344808
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,12,4,128,1,float16,float16,0,0.4504106839497884
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,12,12,128,1,float16,float16,0,0.2603200078010559
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,12,1,128,1,float16,float16,0,0.14696533481280008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,12,12,128,1,float16,fp8,0,0.27844266096750897
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,12,1,128,1,float16,fp8,0,0.13339199622472128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,12,2,128,1,float16,float16,0,0.1731040080388387
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,12,2,128,1,float16,fp8,0,0.15094932913780212
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,12,4,128,1,float16,float16,0,0.251311997572581
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,12,4,128,1,float16,fp8,0,0.2304853399594625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,12,1,128,1,float16,float16,0,0.08523733417193095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,12,12,128,1,float16,float16,0,0.14502400159835815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,12,12,128,1,float16,fp8,0,0.15002133448918661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,12,1,128,1,float16,fp8,0,0.08192533254623413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,12,2,128,1,float16,float16,0,0.09259733557701111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,12,4,128,1,float16,fp8,0,0.12271466851234436
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,12,4,128,1,float16,float16,0,0.11782933274904887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,12,2,128,1,float16,fp8,0,0.09222400188446045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,12,1,128,1,float16,float16,0,0.05596266686916351
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,12,12,128,1,float16,float16,0,0.07408000032107036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,12,1,128,1,float16,fp8,0,0.05439466734727224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,12,12,128,1,float16,fp8,0,0.07998399933179219
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,12,2,128,1,float16,fp8,0,0.06044800082842509
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,12,2,128,1,float16,float16,0,0.06018133461475372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,12,4,128,1,float16,fp8,0,0.07500266532103221
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,12,4,128,1,float16,float16,0,0.07101866602897644
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,12,1,128,1,float16,float16,0,0.039850667119026184
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,12,1,128,1,float16,fp8,0,0.04005333284536997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,12,12,128,1,float16,float16,0,0.04775466521581014
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,12,12,128,1,float16,fp8,0,0.052095999320348106
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,12,2,128,1,float16,float16,0,0.04194133480389913
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,12,2,128,1,float16,fp8,0,0.041797334949175514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,12,4,128,1,float16,float16,0,0.04790399968624115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,12,4,128,1,float16,fp8,0,0.049829334020614624
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,12,1,128,1,float16,float16,0,0.6163146495819092
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,12,1,128,1,float16,fp8,0,0.571173350016276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,12,2,128,1,float16,fp8,0,0.6922453244527181
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,12,2,128,1,float16,float16,0,0.7322026888529459
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,12,4,128,1,float16,float16,0,1.2138346831003826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,12,4,128,1,float16,fp8,0,1.0683733622233074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,12,12,128,1,float16,float16,0,0.6588533322016398
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,12,12,128,1,float16,fp8,0,0.7246399720509847
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,12,1,128,1,float16,float16,0,0.3300960063934326
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,12,1,128,1,float16,fp8,0,0.3035306731859843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,12,2,128,1,float16,float16,0,0.37985066572825116
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,12,2,128,1,float16,fp8,0,0.3641973336537679
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,12,4,128,1,float16,float16,0,0.6239626804987589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,12,4,128,1,float16,fp8,0,0.5945013364156088
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,12,12,128,1,float16,float16,0,0.34216535091400146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,12,1,128,1,float16,float16,0,0.17864000797271729
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,12,1,128,1,float16,fp8,0,0.16420267025629678
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,12,12,128,1,float16,fp8,0,0.3757173220316569
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,12,2,128,1,float16,fp8,0,0.20316267013549805
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,12,2,128,1,float16,float16,0,0.21081066131591797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,12,4,128,1,float16,float16,0,0.3177173336346944
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,12,4,128,1,float16,fp8,0,0.3118186593055725
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,12,1,128,1,float16,float16,0,0.09616000453631084
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,12,1,128,1,float16,fp8,0,0.09378666679064433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,12,12,128,1,float16,float16,0,0.18549867471059164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,12,12,128,1,float16,fp8,0,0.20100265741348267
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,12,2,128,1,float16,float16,0,0.10859733819961548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,12,2,128,1,float16,fp8,0,0.10520533720652263
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,12,4,128,1,float16,float16,0,0.17566933234532675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,12,4,128,1,float16,fp8,0,0.15996799866358438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,12,12,128,1,float16,fp8,0,0.09776533643404643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,12,12,128,1,float16,float16,0,0.1030560036500295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,12,1,128,1,float16,fp8,0,0.05770133435726166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,12,1,128,1,float16,float16,0,0.05961066484451294
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,12,2,128,1,float16,float16,0,0.06542933483918507
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,12,4,128,1,float16,float16,0,0.0815413345893224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,12,2,128,1,float16,fp8,0,0.06474133332570393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,12,4,128,1,float16,fp8,0,0.08843732873598735
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,12,1,128,1,float16,float16,0,0.03745600084463755
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,12,1,128,1,float16,fp8,0,0.03709333389997482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,12,12,128,1,float16,fp8,0,0.0543093333641688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,12,12,128,1,float16,float16,0,0.04804266492525736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,12,2,128,1,float16,float16,0,0.039605334401130676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,12,2,128,1,float16,fp8,0,0.039808000127474465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,12,4,128,1,float16,fp8,0,0.05188799897829691
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,12,4,128,1,float16,float16,0,0.04684266448020935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,12,12,128,1,float16,fp8,0,0.04190933207670847
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,12,12,128,1,float16,float16,0,0.03878933439652125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,12,1,128,1,float16,fp8,0,0.03333866596221924
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,12,1,128,1,float16,float16,0,0.03364266703526179
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,12,2,128,1,float16,float16,0,0.035375999907652535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,12,4,128,1,float16,float16,0,0.037776000797748566
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,12,4,128,1,float16,fp8,0,0.04132800052563349
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,12,2,128,1,float16,fp8,0,0.03547733277082443
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,12,1,128,1,float16,float16,0,0.6770933469136556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,12,1,128,1,float16,fp8,0,0.6292746861775717
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,12,2,128,1,float16,float16,0,0.8122239907582601
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,12,2,128,1,float16,fp8,0,0.790287971496582
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,12,4,128,1,float16,float16,0,1.483301321665446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,12,4,128,1,float16,fp8,0,1.5725760459899902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,12,12,128,1,float16,float16,0,0.7965013186136881
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,12,1,128,1,float16,float16,0,0.3540159861246745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,12,12,128,1,float16,fp8,0,0.8921973705291748
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,12,1,128,1,float16,fp8,0,0.3330133358637492
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,12,2,128,1,float16,float16,0,0.4264853398005168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,12,2,128,1,float16,fp8,0,0.4151573181152344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,12,12,128,1,float16,float16,0,0.4124853213628133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,12,4,128,1,float16,float16,0,0.7575786908467611
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,12,12,128,1,float16,fp8,0,0.45892266432444256
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,12,4,128,1,float16,fp8,0,0.8097279866536459
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,12,1,128,1,float16,float16,0,0.19581866264343262
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,12,2,128,1,float16,float16,0,0.23280000686645508
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,12,1,128,1,float16,fp8,0,0.1773279905319214
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,12,2,128,1,float16,fp8,0,0.22445867458979288
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,12,12,128,1,float16,float16,0,0.21663999557495117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,12,4,128,1,float16,float16,0,0.3665013313293457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,12,4,128,1,float16,fp8,0,0.35500800609588623
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,12,1,128,1,float16,float16,0,0.10409067074457805
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,12,1,128,1,float16,fp8,0,0.09464533130327861
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,12,2,128,1,float16,float16,0,0.12947733203570047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,12,12,128,1,float16,fp8,0,0.23973333835601807
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,12,2,128,1,float16,fp8,0,0.11594133575757344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,12,4,128,1,float16,fp8,0,0.22183465957641602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,12,4,128,1,float16,float16,0,0.20839466651280722
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,12,12,128,1,float16,float16,0,0.11764267086982727
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,12,12,128,1,float16,fp8,0,0.12331733107566833
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,12,1,128,1,float16,float16,0,0.06018666426340739
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,12,1,128,1,float16,fp8,0,0.058464000622431435
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,12,2,128,1,float16,float16,0,0.06578133503595988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,12,2,128,1,float16,fp8,0,0.06869333485762279
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,12,4,128,1,float16,float16,0,0.09099733829498291
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,12,4,128,1,float16,fp8,0,0.10073600212732951
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,12,12,128,1,float16,float16,0,0.05586666862169901
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,12,1,128,1,float16,float16,0,0.04068266600370407
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,12,1,128,1,float16,fp8,0,0.0395413339138031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,12,12,128,1,float16,fp8,0,0.06445866823196411
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,12,2,128,1,float16,float16,0,0.04362666606903076
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,12,2,128,1,float16,fp8,0,0.04438933233420054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,12,4,128,1,float16,float16,0,0.05426133175690969
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,12,4,128,1,float16,fp8,0,0.0602400004863739
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,12,1,128,1,float16,float16,0,0.029205332199732464
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,12,12,128,1,float16,float16,0,0.03565866748491923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,12,1,128,1,float16,fp8,0,0.02920000006755193
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,12,12,128,1,float16,fp8,0,0.039893334110577904
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,12,2,128,1,float16,float16,0,0.02956799914439519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,12,2,128,1,float16,fp8,0,0.029978667696317036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,12,4,128,1,float16,fp8,0,0.03749333322048187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,12,4,128,1,float16,float16,0,0.03545066714286804
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,12,1,128,1,float16,fp8,0,0.02717333287000656
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,12,1,128,1,float16,float16,0,0.027210667729377747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,12,12,128,1,float16,fp8,0,0.03342933456103007
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,12,12,128,1,float16,float16,0,0.031136001149813335
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,12,2,128,1,float16,float16,0,0.027642667293548584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,12,2,128,1,float16,fp8,0,0.027306665976842243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,12,4,128,1,float16,float16,0,0.030005333324273426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,12,4,128,1,float16,fp8,0,0.03156800071398417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,12,1,128,1,float16,fp8,0,0.5092426538467407
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,12,1,128,1,float16,float16,0,0.5389546553293864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,12,2,128,1,float16,float16,0,0.6698773701985677
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,12,2,128,1,float16,fp8,0,0.6703999837239584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,12,12,128,1,float16,float16,0,0.7213119665781657
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,12,12,128,1,float16,fp8,0,0.8290293216705322
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,12,4,128,1,float16,float16,0,1.3495573997497559
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,12,4,128,1,float16,fp8,0,1.1238719622294109
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,12,1,128,1,float16,float16,0,0.284986674785614
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,12,1,128,1,float16,fp8,0,0.26554133494695026
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,12,2,128,1,float16,float16,0,0.35493866602579754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,12,2,128,1,float16,fp8,0,0.35248533884684247
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,12,4,128,1,float16,float16,0,0.6869493325551351
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,12,4,128,1,float16,fp8,0,0.6067999998728434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,12,12,128,1,float16,float16,0,0.37333865960439044
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,12,1,128,1,float16,float16,0,0.15025599797566733
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,12,12,128,1,float16,fp8,0,0.42499200503031415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,12,1,128,1,float16,fp8,0,0.147599995136261
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,12,2,128,1,float16,float16,0,0.19789334138234457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,12,2,128,1,float16,fp8,0,0.1901599963506063
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,12,4,128,1,float16,float16,0,0.32330665985743207
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,12,4,128,1,float16,fp8,0,0.31801066795984906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,12,12,128,1,float16,fp8,0,0.22161600987116495
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,12,12,128,1,float16,float16,0,0.19604800144831339
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,12,1,128,1,float16,float16,0,0.08298666775226593
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,12,1,128,1,float16,fp8,0,0.07509333391984303
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,12,2,128,1,float16,float16,0,0.10719999670982361
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,12,2,128,1,float16,fp8,0,0.09673600395520528
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,12,4,128,1,float16,float16,0,0.17132800817489624
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,12,4,128,1,float16,fp8,0,0.20431999365488687
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,12,1,128,1,float16,float16,0,0.047877331574757896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,12,12,128,1,float16,fp8,0,0.11716266473134358
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,12,12,128,1,float16,float16,0,0.10548800230026245
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,12,1,128,1,float16,fp8,0,0.04858666658401489
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,12,2,128,1,float16,float16,0,0.05442133545875549
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,12,2,128,1,float16,fp8,0,0.05773333211739858
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,12,4,128,1,float16,float16,0,0.0781173308690389
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,12,4,128,1,float16,fp8,0,0.08929600318272908
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,12,12,128,1,float16,fp8,0,0.05704533557097117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,12,1,128,1,float16,float16,0,0.03364799916744232
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,12,12,128,1,float16,float16,0,0.049957334995269775
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,12,2,128,1,float16,float16,0,0.0363520011305809
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,12,2,128,1,float16,fp8,0,0.037589333951473236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,12,1,128,1,float16,fp8,0,0.031557333966096245
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,12,4,128,1,float16,float16,0,0.04738133152325948
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,12,4,128,1,float16,fp8,0,0.05246399839719137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,12,12,128,1,float16,float16,0,0.02942399928967158
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,12,1,128,1,float16,float16,0,0.023141334454218548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,12,12,128,1,float16,fp8,0,0.03579733272393545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,12,1,128,1,float16,fp8,0,0.02334933231274287
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,12,2,128,1,float16,float16,0,0.02359466751416524
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,12,2,128,1,float16,fp8,0,0.025568000972270966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,12,4,128,1,float16,float16,0,0.02958400050799052
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,12,4,128,1,float16,fp8,0,0.03239466746648153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,12,1,128,1,float16,float16,0,0.021327999730904896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,12,12,128,1,float16,fp8,0,0.027119999130566914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,12,12,128,1,float16,float16,0,0.025306666890780132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,12,1,128,1,float16,fp8,0,0.02102400114138921
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,12,2,128,1,float16,float16,0,0.021536000072956085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,12,2,128,1,float16,fp8,0,0.023215999205907185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,12,4,128,1,float16,float16,0,0.02515733242034912
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,12,4,128,1,float16,fp8,0,0.025648000339667004
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,12,12,128,1,float16,fp8,0,0.023152001202106476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,12,12,128,1,float16,float16,0,0.02144533395767212
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,12,1,128,1,float16,float16,0,0.01947733387351036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,12,1,128,1,float16,fp8,0,0.021136000752449036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,12,2,128,1,float16,fp8,0,0.021210665504137676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,12,2,128,1,float16,float16,0,0.021312000850836437
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,12,4,128,1,float16,float16,0,0.023168000082174938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,12,4,128,1,float16,fp8,0,0.023205332458019257
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,12,1,128,1,float16,float16,0,0.24150933821996054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,12,1,128,1,float16,fp8,0,0.23725332816441855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,12,2,128,1,float16,float16,0,0.3043839931488037
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,12,2,128,1,float16,fp8,0,0.3111199935277303
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,12,12,128,1,float16,float16,0,0.3494453430175781
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,12,12,128,1,float16,fp8,0,0.40675199031829834
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,12,4,128,1,float16,float16,0,0.6276373465855917
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,12,4,128,1,float16,fp8,0,0.6505386829376221
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,12,1,128,1,float16,float16,0,0.13436800241470337
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,12,1,128,1,float16,fp8,0,0.12970667084058127
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,12,2,128,1,float16,float16,0,0.17014400164286295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,12,2,128,1,float16,fp8,0,0.16847999890645346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,12,4,128,1,float16,float16,0,0.3221813241640727
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,12,12,128,1,float16,fp8,0,0.21256534258524576
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,12,4,128,1,float16,fp8,0,0.3267680009206136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,12,12,128,1,float16,float16,0,0.18442134062449136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,12,1,128,1,float16,float16,0,0.07197333375612895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,12,1,128,1,float16,fp8,0,0.06798399984836578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,12,2,128,1,float16,fp8,0,0.08701866865158081
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,12,2,128,1,float16,float16,0,0.09708799918492635
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,12,4,128,1,float16,float16,0,0.16766399145126343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,12,1,128,1,float16,float16,0,0.04030933231115341
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,12,12,128,1,float16,fp8,0,0.11115200320879619
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,12,4,128,1,float16,fp8,0,0.16639467080434164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,12,12,128,1,float16,float16,0,0.09730666875839233
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,12,1,128,1,float16,fp8,0,0.04144533226887385
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,12,2,128,1,float16,float16,0,0.047194664676984154
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,12,2,128,1,float16,fp8,0,0.05123733480771383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,12,4,128,1,float16,float16,0,0.07016000151634216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,12,4,128,1,float16,fp8,0,0.08261866867542267
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,12,1,128,1,float16,float16,0,0.028618666032950085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,12,12,128,1,float16,fp8,0,0.05414933462937673
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,12,12,128,1,float16,float16,0,0.04529599845409393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,12,2,128,1,float16,float16,0,0.03215999901294708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,12,1,128,1,float16,fp8,0,0.02938133229811986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,12,2,128,1,float16,fp8,0,0.03503466645876566
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,12,4,128,1,float16,float16,0,0.04358933369318644
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,12,4,128,1,float16,fp8,0,0.049914668003718056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,12,12,128,1,float16,float16,0,0.02738133321205775
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,12,12,128,1,float16,fp8,0,0.0315733328461647
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,12,1,128,1,float16,float16,0,0.019248000035683315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,12,1,128,1,float16,fp8,0,0.020213333268960316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,12,2,128,1,float16,float16,0,0.021295999487241108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,12,2,128,1,float16,fp8,0,0.023120000958442688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,12,4,128,1,float16,float16,0,0.027471999327341717
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,12,4,128,1,float16,fp8,0,0.029418667157491047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,12,12,128,1,float16,fp8,0,0.02436800052722295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,12,1,128,1,float16,float16,0,0.018960000326236088
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,12,1,128,1,float16,fp8,0,0.01725333308180173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,12,12,128,1,float16,float16,0,0.021530665457248688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,12,2,128,1,float16,float16,0,0.01924266666173935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,12,2,128,1,float16,fp8,0,0.019482667247454327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,12,4,128,1,float16,float16,0,0.02128000060717265
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,12,4,128,1,float16,fp8,0,0.023141334454218548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,12,1,128,1,float16,float16,0,0.01729600007335345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,12,1,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,12,12,128,1,float16,fp8,0,0.021013334393501282
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,12,12,128,1,float16,float16,0,0.019381333142518997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,12,2,128,1,float16,float16,0,0.01708799973130226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,12,2,128,1,float16,fp8,0,0.017157333592573803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,12,4,128,1,float16,fp8,0,0.01941866676012675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,12,4,128,1,float16,float16,0,0.019226666539907455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,12,1,128,1,float16,float16,0,0.017407999684413273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,12,1,128,1,float16,fp8,0,0.01721599946419398
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,12,12,128,1,float16,float16,0,0.017077332983414333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,12,12,128,1,float16,fp8,0,0.017221332838137943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,12,2,128,1,float16,float16,0,0.01740266631046931
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,12,2,128,1,float16,fp8,0,0.017338667064905167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,12,4,128,1,float16,float16,0,0.01699200024207433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,12,4,128,1,float16,fp8,0,0.017829333742459614
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,12,1,128,1,float16,float16,0,0.14708266655604044
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,12,1,128,1,float16,fp8,0,0.1378986636797587
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,12,2,128,1,float16,float16,0,0.17637866735458374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,12,2,128,1,float16,fp8,0,0.1840533415476481
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,12,4,128,1,float16,float16,0,0.3173866669336955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,12,4,128,1,float16,fp8,0,0.3315466642379761
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,12,1,128,1,float16,float16,0,0.0775679995616277
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,12,12,128,1,float16,fp8,0,0.21633599201838175
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,12,1,128,1,float16,fp8,0,0.0722453345855077
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,12,12,128,1,float16,float16,0,0.18764267365137735
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,12,2,128,1,float16,float16,0,0.10154666503270467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,12,2,128,1,float16,fp8,0,0.09820800026257832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,12,4,128,1,float16,float16,0,0.17935999234517416
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,12,12,128,1,float16,float16,0,0.10008533795674641
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,12,1,128,1,float16,float16,0,0.043680002291997276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,12,12,128,1,float16,fp8,0,0.11362666885058086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,12,4,128,1,float16,fp8,0,0.19234132766723633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,12,2,128,1,float16,float16,0,0.05005866785844167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,12,1,128,1,float16,fp8,0,0.043653334180514015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,12,2,128,1,float16,fp8,0,0.05416533350944519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,12,4,128,1,float16,float16,0,0.0734506646792094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,12,1,128,1,float16,float16,0,0.027280000348885853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,12,12,128,1,float16,fp8,0,0.05187733471393585
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,12,12,128,1,float16,float16,0,0.04367466767628988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,12,4,128,1,float16,fp8,0,0.08482666810353597
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,12,1,128,1,float16,fp8,0,0.0272533322374026
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,12,2,128,1,float16,float16,0,0.030986666679382324
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,12,2,128,1,float16,fp8,0,0.03323200096686681
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,12,4,128,1,float16,float16,0,0.04165866722663244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,12,4,128,1,float16,fp8,0,0.047925333182017006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,12,1,128,1,float16,float16,0,0.020960000654061634
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,12,12,128,1,float16,fp8,0,0.03355200091997782
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,12,12,128,1,float16,float16,0,0.027600000301996868
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,12,1,128,1,float16,fp8,0,0.021087999145189922
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,12,2,128,1,float16,float16,0,0.0227360005180041
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,12,2,128,1,float16,fp8,0,0.023157333334287006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,12,4,128,1,float16,float16,0,0.027482666075229645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,12,4,128,1,float16,fp8,0,0.03141866624355316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,12,12,128,1,float16,float16,0,0.01932266727089882
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,12,1,128,1,float16,fp8,0,0.014943999548753103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,12,1,128,1,float16,float16,0,0.01492799942692121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,12,12,128,1,float16,fp8,0,0.02120000123977661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,12,2,128,1,float16,float16,0,0.016917333006858826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,12,2,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,12,4,128,1,float16,fp8,0,0.019440000255902607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,12,4,128,1,float16,float16,0,0.019306667149066925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,12,12,128,1,float16,float16,0,0.015295999745527903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,12,1,128,1,float16,float16,0,0.013050666699806849
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,12,1,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,12,12,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,12,2,128,1,float16,float16,0,0.014202666779359182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,12,2,128,1,float16,fp8,0,0.01321600005030632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,12,4,128,1,float16,fp8,0,0.016842667013406754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,12,4,128,1,float16,float16,0,0.015360000232855478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,12,1,128,1,float16,float16,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,12,1,128,1,float16,fp8,0,0.014762666076421738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,12,12,128,1,float16,fp8,0,0.014869333555301031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,12,12,128,1,float16,float16,0,0.013258667041858038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,12,2,128,1,float16,float16,0,0.013199999928474426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,12,2,128,1,float16,fp8,0,0.014975999792416891
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,12,4,128,1,float16,fp8,0,0.014181333283583323
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,12,4,128,1,float16,float16,0,0.013056000073750814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,12,1,128,1,float16,float16,0,0.013114667187134424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,12,12,128,1,float16,fp8,0,0.01498666654030482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,12,12,128,1,float16,float16,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,12,1,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,12,2,128,1,float16,float16,0,0.013173333058754602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,12,2,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,12,4,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,12,4,128,1,float16,float16,0,0.012960000584522883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,12,1,128,1,float16,float16,0,0.10742933551470439
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,12,1,128,1,float16,fp8,0,0.09499200185139973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,12,2,128,1,float16,float16,0,0.12526399890581766
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,12,2,128,1,float16,fp8,0,0.12027200063069661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,12,12,128,1,float16,float16,0,0.11526399850845337
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,12,12,128,1,float16,fp8,0,0.12396799524625142
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,12,4,128,1,float16,float16,0,0.2093600034713745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,12,4,128,1,float16,fp8,0,0.22378132740656534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,12,1,128,1,float16,float16,0,0.056703999638557434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,12,1,128,1,float16,fp8,0,0.053898667295773826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,12,2,128,1,float16,float16,0,0.06270933151245117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,12,2,128,1,float16,fp8,0,0.06469333171844482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,12,4,128,1,float16,float16,0,0.09142399827639262
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,12,4,128,1,float16,fp8,0,0.09514133135477702
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,12,12,128,1,float16,float16,0,0.05205333232879639
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,12,12,128,1,float16,fp8,0,0.05808533231417338
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,12,1,128,1,float16,float16,0,0.033514666060606636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,12,1,128,1,float16,fp8,0,0.03350399931271871
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,12,2,128,1,float16,float16,0,0.03772799919048945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,12,2,128,1,float16,fp8,0,0.039493332306543984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,12,4,128,1,float16,float16,0,0.04772266745567322
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,12,4,128,1,float16,fp8,0,0.05390933156013489
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,12,12,128,1,float16,float16,0,0.030405332644780476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,12,12,128,1,float16,fp8,0,0.0346666673819224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,12,1,128,1,float16,float16,0,0.02333866556485494
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,12,1,128,1,float16,fp8,0,0.02294933299223582
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,12,2,128,1,float16,float16,0,0.025045332809289295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,12,2,128,1,float16,fp8,0,0.02532266577084859
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,12,4,128,1,float16,float16,0,0.02956266701221466
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,12,4,128,1,float16,fp8,0,0.0315733328461647
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,12,12,128,1,float16,float16,0,0.021546666820844013
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,12,1,128,1,float16,float16,0,0.01748266691962878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,12,12,128,1,float16,fp8,0,0.023168000082174938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,12,1,128,1,float16,fp8,0,0.01701333373785019
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,12,2,128,1,float16,float16,0,0.018911999960740406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,12,2,128,1,float16,fp8,0,0.019215999792019527
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,12,4,128,1,float16,float16,0,0.02096533278624217
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,12,4,128,1,float16,fp8,0,0.023269332945346832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,12,12,128,1,float16,float16,0,0.015114666273196539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,12,1,128,1,float16,float16,0,0.013327999661366144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,12,12,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,12,1,128,1,float16,fp8,0,0.01309866706530253
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,12,2,128,1,float16,float16,0,0.013167999684810638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,12,2,128,1,float16,fp8,0,0.01328533391157786
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,12,4,128,1,float16,fp8,0,0.01543466622630755
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,12,4,128,1,float16,float16,0,0.015301333119471868
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,12,12,128,1,float16,float16,0,0.013082666943470636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,12,1,128,1,float16,float16,0,0.013007999708255133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,12,12,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,12,2,128,1,float16,float16,0,0.012800000607967377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,12,1,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,12,2,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,12,4,128,1,float16,float16,0,0.012997332960367203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,12,4,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,12,12,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,12,1,128,1,float16,float16,0,0.012986666212479273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,12,12,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,12,1,128,1,float16,fp8,0,0.013237333546082178
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,12,2,128,1,float16,float16,0,0.012752000242471695
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,12,2,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,12,4,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,12,4,128,1,float16,fp8,0,0.013194666554530462
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,12,1,128,1,float16,float16,0,0.012810666114091873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,12,12,128,1,float16,float16,0,0.012805332740147909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,12,12,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,12,2,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,12,1,128,1,float16,fp8,0,0.013269333789745966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,12,4,128,1,float16,float16,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,12,2,128,1,float16,fp8,0,0.013114667187134424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,12,4,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,12,2,128,1,float16,float16,0,0.09865066409111023
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,12,1,128,1,float16,fp8,0,0.0792799989382426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,12,2,128,1,float16,fp8,0,0.08941866954167683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,12,1,128,1,float16,float16,0,0.08971200386683147
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,12,12,128,1,float16,float16,0,0.07828799883524577
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,12,4,128,1,float16,float16,0,0.14217600226402283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,12,4,128,1,float16,fp8,0,0.12079999844233195
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,12,12,128,1,float16,fp8,0,0.07037333150704701
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,12,1,128,1,float16,float16,0,0.05147199829419454
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,12,1,128,1,float16,fp8,0,0.04574400186538696
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,12,2,128,1,float16,float16,0,0.05388799806435903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,12,2,128,1,float16,fp8,0,0.05040533343950907
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,12,4,128,1,float16,float16,0,0.06625600159168243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,12,12,128,1,float16,float16,0,0.03737066686153412
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,12,4,128,1,float16,fp8,0,0.06617600222428639
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,12,1,128,1,float16,float16,0,0.03150933235883713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,12,12,128,1,float16,fp8,0,0.041120000183582306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,12,1,128,1,float16,fp8,0,0.02903999884923299
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,12,2,128,1,float16,float16,0,0.03161599983771642
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,12,2,128,1,float16,fp8,0,0.031541332602500916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,12,4,128,1,float16,float16,0,0.03736533224582672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,12,4,128,1,float16,fp8,0,0.03829866647720337
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,12,12,128,1,float16,float16,0,0.02390933285156886
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,12,12,128,1,float16,fp8,0,0.025573333104451496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,12,1,128,1,float16,float16,0,0.019248000035683315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,12,1,128,1,float16,fp8,0,0.01913600042462349
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,12,2,128,1,float16,float16,0,0.021066665649414062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,12,2,128,1,float16,fp8,0,0.021136000752449036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,12,4,128,1,float16,float16,0,0.023333333432674408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,12,4,128,1,float16,fp8,0,0.025285333395004272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,12,12,128,1,float16,fp8,0,0.018853332847356796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,12,12,128,1,float16,float16,0,0.017125333348910015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,12,1,128,1,float16,float16,0,0.015168000012636185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,12,1,128,1,float16,fp8,0,0.014896000425020853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,12,2,128,1,float16,float16,0,0.01526933287580808
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,12,2,128,1,float16,fp8,0,0.01532799998919169
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,12,4,128,1,float16,float16,0,0.017157333592573803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,12,4,128,1,float16,fp8,0,0.017504000415404636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,12,12,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,12,1,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,12,1,128,1,float16,float16,0,0.013002666334311167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,12,12,128,1,float16,fp8,0,0.013194666554530462
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,12,2,128,1,float16,float16,0,0.013125333935022354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,12,2,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,12,4,128,1,float16,float16,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,12,4,128,1,float16,fp8,0,0.013434667140245438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,12,1,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,12,1,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,12,12,128,1,float16,float16,0,0.01293333371480306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,12,12,128,1,float16,fp8,0,0.012831999609867731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,12,2,128,1,float16,float16,0,0.012719999998807907
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,12,2,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,12,4,128,1,float16,float16,0,0.01310933381319046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,12,4,128,1,float16,fp8,0,0.013189333180586496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,12,1,128,1,float16,float16,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,12,1,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,12,12,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,12,12,128,1,float16,float16,0,0.010981333752473196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,12,2,128,1,float16,float16,0,0.012853333105643591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,12,2,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,12,4,128,1,float16,float16,0,0.011168000598748526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,12,4,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,12,1,128,1,float16,float16,0,0.012757333616415659
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,12,12,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,12,12,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,12,1,128,1,float16,fp8,0,0.01309866706530253
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,12,2,128,1,float16,float16,0,0.011029332876205444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,12,2,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,12,4,128,1,float16,float16,0,0.012773333738247553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,12,4,128,1,float16,fp8,0,0.01209066684047381
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,12,1,128,1,float16,fp8,0,0.07234666744867961
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,12,1,128,1,float16,float16,0,0.08725866675376892
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,12,2,128,1,float16,float16,0,0.0892693301041921
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,12,2,128,1,float16,fp8,0,0.07700799902280171
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,12,4,128,1,float16,float16,0,0.10205866893132527
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,12,4,128,1,float16,fp8,0,0.09097599983215332
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,12,12,128,1,float16,float16,0,0.0569653312365214
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,12,1,128,1,float16,float16,0,0.0498933345079422
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,12,12,128,1,float16,fp8,0,0.053861334919929504
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,12,1,128,1,float16,fp8,0,0.04172799984614054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,12,2,128,1,float16,fp8,0,0.043951998154322304
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,12,4,128,1,float16,float16,0,0.05589866638183594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,12,4,128,1,float16,fp8,0,0.05202133456865946
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,12,2,128,1,float16,float16,0,0.05012266834576925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,12,1,128,1,float16,float16,0,0.029450667401154835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,12,12,128,1,float16,fp8,0,0.03329066683848699
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,12,12,128,1,float16,float16,0,0.03366400053103765
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,12,1,128,1,float16,fp8,0,0.02552533398071925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,12,2,128,1,float16,float16,0,0.029674666623274486
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,12,2,128,1,float16,fp8,0,0.028064000109831493
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,12,4,128,1,float16,float16,0,0.03339733431736628
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,12,4,128,1,float16,fp8,0,0.0316746657093366
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,12,12,128,1,float16,fp8,0,0.021327999730904896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,12,12,128,1,float16,float16,0,0.021205333371957142
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,12,1,128,1,float16,float16,0,0.019904000063737232
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,12,1,128,1,float16,fp8,0,0.019306667149066925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,12,2,128,1,float16,float16,0,0.019530666371186573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,12,2,128,1,float16,fp8,0,0.01915733392039935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,12,4,128,1,float16,float16,0,0.021712000171343487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,12,4,128,1,float16,fp8,0,0.021087999145189922
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,12,1,128,1,float16,float16,0,0.015205333630243937
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,12,12,128,1,float16,float16,0,0.01533866673707962
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,12,12,128,1,float16,fp8,0,0.014975999792416891
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,12,1,128,1,float16,fp8,0,0.01526933287580808
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,12,2,128,1,float16,float16,0,0.015685333559910457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,12,2,128,1,float16,fp8,0,0.015194666882356008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,12,4,128,1,float16,float16,0,0.015098666151364645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,12,1,128,1,float16,float16,0,0.012762666990359625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,12,4,128,1,float16,fp8,0,0.015184000134468079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,12,12,128,1,float16,fp8,0,0.013130666067202887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,12,12,128,1,float16,float16,0,0.01320533330241839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,12,1,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,12,2,128,1,float16,float16,0,0.012917333592971167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,12,2,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,12,4,128,1,float16,float16,0,0.013151999562978745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,12,4,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,12,12,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,12,1,128,1,float16,float16,0,0.012837332983811697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,12,12,128,1,float16,float16,0,0.011141333729028702
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,12,1,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,12,2,128,1,float16,float16,0,0.01221866657336553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,12,2,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,12,4,128,1,float16,float16,0,0.01109333336353302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,12,4,128,1,float16,fp8,0,0.012863999853531519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,12,1,128,1,float16,fp8,0,0.012975999464591345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,12,1,128,1,float16,float16,0,0.010901333143313726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,12,12,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,12,12,128,1,float16,float16,0,0.012863999853531519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,12,2,128,1,float16,float16,0,0.012357333054145178
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,12,2,128,1,float16,fp8,0,0.011125333607196808
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,12,4,128,1,float16,fp8,0,0.012538666526476542
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,12,4,128,1,float16,float16,0,0.012821332861979803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,12,1,128,1,float16,float16,0,0.012479999413092932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,12,1,128,1,float16,fp8,0,0.011183999478816986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,12,12,128,1,float16,float16,0,0.011765333513418833
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,12,12,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,12,2,128,1,float16,float16,0,0.012800000607967377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,12,2,128,1,float16,fp8,0,0.012159999459981918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,12,4,128,1,float16,float16,0,0.011168000598748526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,12,4,128,1,float16,fp8,0,0.01119999960064888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,0,0.013104000439246496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,0,0.014853333433469137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,12,12,128,1,float16,float16,0,0.023333333432674408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,12,12,128,1,float16,fp8,0,0.021216000119845074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,0,0.01907733331123988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,0,0.01727466657757759
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,0,0.01091733326514562
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,0,0.011130666981140772
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,0,0.012773333738247553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,0,0.012565333396196365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,0,0.015013333410024643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,0,0.015008000036080679
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,12,12,128,1,float16,float16,0,0.01703466723362605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,12,12,128,1,float16,fp8,0,0.016762666404247284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,0,0.010869332899649939
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,0,0.010682666053374609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,0,0.010805333654085795
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,0,0.011296000331640244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,12,12,128,1,float16,float16,0,0.01312000056107839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,12,12,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,0,0.009568000212311745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,0,0.010661333799362183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,0,0.009301333377758661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,0,0.010832000523805618
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,0,0.010879999647537867
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,0,0.010879999647537867
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,12,12,128,1,float16,float16,0,0.011237333218256632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,0,0.009402666861812273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,12,12,128,1,float16,fp8,0,0.010954666882753372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,0,0.010751999914646149
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,0,0.010922666639089584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,0,0.009519999846816063
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,0,0.010666667173306147
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,0,0.01091733326514562
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,12,12,128,1,float16,float16,0,0.01101333275437355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,0,0.010981333752473196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,12,12,128,1,float16,fp8,0,0.010784000158309937
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,0,0.010133333504199982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,0,0.010741333166758219
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,0,0.008901333436369896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,0,0.009242666885256767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,12,12,128,1,float16,float16,0,0.010784000158309937
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,12,12,128,1,float16,fp8,0,0.011285333583752314
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,0,0.009061333412925402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,0,0.010992000500361124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,0,0.009824000298976898
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,0,0.011221333096424738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,12,12,128,1,float16,float16,0,0.010559999694426855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,0,0.009039999917149544
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,12,12,128,1,float16,fp8,0,0.010837333897749582
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,0,0.008949333180983862
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,0,0.010879999647537867
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,12,12,128,1,float16,fp8,0,0.010288000106811523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,12,12,128,1,float16,float16,0,0.010293333480755487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,0,0.010735999792814255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,0,0.009056000038981438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,0,0.009162666896979014
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,0,0.010746666540702185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,8,1,128,1,float16,fp8,0,2.7415520350138345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,8,1,128,1,float16,float16,0,3.240250587463379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,8,2,128,1,float16,float16,0,3.275279998779297
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,8,2,128,1,float16,fp8,0,2.926543871561686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,1.8571893374125164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,float16,0,2.1712427139282227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,8,4,128,1,float16,float16,0,3.9359518686930337
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,8,4,128,1,float16,fp8,0,3.6519041061401367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,8,1,128,1,float16,float16,0,1.6645867029825847
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,8,1,128,1,float16,fp8,0,1.5281440416971843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,8,2,128,1,float16,float16,0,1.7543946901957195
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,8,2,128,1,float16,fp8,0,1.511781374613444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,8,4,128,1,float16,fp8,0,1.788101355234782
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,8,4,128,1,float16,float16,0,2.1291786829630532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,fp8,0,1.805397351582845
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,8,8,128,1,float16,float16,0,2.20577065149943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,float16,0,1.0734346707661946
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,1.054085334142049
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,8,1,128,1,float16,float16,0,0.9303680260976156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,8,1,128,1,float16,fp8,0,0.7920426527659098
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,8,2,128,1,float16,float16,0,0.9517172972361246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,8,2,128,1,float16,fp8,0,0.8381546338399252
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,8,4,128,1,float16,fp8,0,0.9676853020985922
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,8,4,128,1,float16,float16,0,1.097434679667155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,fp8,0,0.9801812966664633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,8,8,128,1,float16,float16,0,1.0715359846750896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,0.5638773441314697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,float16,0,0.6112693150838217
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,8,1,128,1,float16,float16,0,0.525434652964274
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,8,1,128,1,float16,fp8,0,0.4626986583073934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,8,2,128,1,float16,float16,0,0.5463466644287109
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,8,2,128,1,float16,fp8,0,0.4761066834131877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,8,4,128,1,float16,fp8,0,0.5485813220342001
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,8,4,128,1,float16,float16,0,0.615066647529602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,fp8,0,0.5604053338368734
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,8,8,128,1,float16,float16,0,0.6131093502044678
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,8,1,128,1,float16,fp8,0,1.6474026044209797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,8,1,128,1,float16,float16,0,1.865514596303304
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,8,2,128,1,float16,float16,0,1.9657333691914876
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,8,2,128,1,float16,fp8,0,1.7483839988708496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,1.1675786972045898
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,float16,0,1.2593653202056885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,8,4,128,1,float16,float16,0,2.429141362508138
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,8,4,128,1,float16,fp8,0,2.7253599166870117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,8,1,128,1,float16,fp8,0,0.8720160325368246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,8,1,128,1,float16,float16,0,1.0244800249735515
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,8,2,128,1,float16,float16,0,1.0728960037231445
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,8,2,128,1,float16,fp8,0,0.9380906422932943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,8,4,128,1,float16,float16,0,1.309157371520996
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,8,4,128,1,float16,fp8,0,1.2245279947916667
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,float16,0,1.256106694539388
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,float16,0,0.6849760214487711
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,8,8,128,1,float16,fp8,0,1.1589226722717285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,0.6568479935328165
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,8,1,128,1,float16,fp8,0,0.48465601603190106
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,8,1,128,1,float16,float16,0,0.5672159989674886
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,8,2,128,1,float16,float16,0,0.5890400012334188
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,8,2,128,1,float16,fp8,0,0.5194400151570638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,8,4,128,1,float16,float16,0,0.7081759770711263
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,8,4,128,1,float16,fp8,0,0.6423786481221517
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,float16,0,0.698533296585083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,float16,0,0.3994293212890625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,8,8,128,1,float16,fp8,0,0.6363733212153116
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,0.3744106690088908
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,8,1,128,1,float16,fp8,0,0.2974720001220703
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,8,1,128,1,float16,float16,0,0.33372267087300617
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,8,2,128,1,float16,fp8,0,0.31146132946014404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,8,2,128,1,float16,float16,0,0.33907198905944824
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,8,4,128,1,float16,float16,0,0.393829345703125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,8,4,128,1,float16,fp8,0,0.3675626516342163
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,float16,0,0.3993813196818034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,8,8,128,1,float16,fp8,0,0.3736746708552043
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,8,1,128,1,float16,float16,0,1.3915360768636067
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,8,1,128,1,float16,fp8,0,1.1956799825032551
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,8,2,128,1,float16,fp8,0,1.2931573390960693
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,8,2,128,1,float16,float16,0,1.4405120213826497
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,0.8731093406677246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,float16,0,0.9333813190460205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,8,4,128,1,float16,fp8,0,1.6228906313578289
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,8,4,128,1,float16,float16,0,1.8153120676676433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,8,1,128,1,float16,float16,0,0.7405280272165934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,8,1,128,1,float16,fp8,0,0.6449120044708252
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,8,2,128,1,float16,float16,0,0.7827626864115397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,8,2,128,1,float16,fp8,0,0.698362668355306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,8,4,128,1,float16,fp8,0,0.8756693204243978
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,8,4,128,1,float16,float16,0,0.9597547054290771
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,float16,0,0.9420320192972819
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,8,8,128,1,float16,fp8,0,0.8725653489430746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,float16,0,0.5258986552556356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,0.4960533380508423
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,8,1,128,1,float16,float16,0,0.4169706503550212
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,8,1,128,1,float16,fp8,0,0.36341333389282227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,8,2,128,1,float16,fp8,0,0.38845332463582355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,8,2,128,1,float16,float16,0,0.4413599967956543
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,8,4,128,1,float16,float16,0,0.5444426536560059
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,8,4,128,1,float16,fp8,0,0.48527999718983966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,fp8,0,0.4971413215001424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,8,8,128,1,float16,float16,0,0.5230826536814371
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,float16,0,0.318448007106781
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,0.28109333912531537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,8,1,128,1,float16,float16,0,0.25442665815353394
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,8,1,128,1,float16,fp8,0,0.22793600956598917
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,8,2,128,1,float16,float16,0,0.2621866663297017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,8,2,128,1,float16,fp8,0,0.24025599161783853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,8,4,128,1,float16,float16,0,0.29129066069920856
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,8,4,128,1,float16,fp8,0,0.2783946593602498
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,float16,0,0.3091040054957072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,8,8,128,1,float16,fp8,0,0.28146133820215863
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,8,1,128,1,float16,fp8,0,1.529647986094157
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,8,1,128,1,float16,float16,0,1.7385387420654297
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,8,2,128,1,float16,fp8,0,1.689359982808431
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,8,2,128,1,float16,float16,0,1.8824106852213542
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,1.3619732856750488
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,float16,0,1.2677546342213948
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,8,4,128,1,float16,fp8,0,2.456810633341471
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,8,4,128,1,float16,float16,0,2.547920068105062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,8,1,128,1,float16,fp8,0,0.8106613159179688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,8,1,128,1,float16,float16,0,0.9302720228830973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,8,2,128,1,float16,float16,0,0.9876480102539062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,8,2,128,1,float16,fp8,0,0.8877279758453369
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,8,4,128,1,float16,fp8,0,1.1363413333892822
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,8,4,128,1,float16,float16,0,1.2906773090362549
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,float16,0,1.2551733652750652
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,8,8,128,1,float16,fp8,0,1.281274636586507
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,0.640496015548706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,float16,0,0.6775146325429281
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,8,1,128,1,float16,fp8,0,0.4425813357035319
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,8,1,128,1,float16,float16,0,0.5118133227030436
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,8,2,128,1,float16,fp8,0,0.48986132939656574
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,8,2,128,1,float16,float16,0,0.555733323097229
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,8,4,128,1,float16,fp8,0,0.6232000192006429
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,8,4,128,1,float16,float16,0,0.6825653711954752
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,fp8,0,0.6395359834035238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,8,8,128,1,float16,float16,0,0.6760053634643555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,float16,0,0.3800373474756877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,0.36367468039194745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,8,1,128,1,float16,fp8,0,0.26216532786687213
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,8,1,128,1,float16,float16,0,0.28911999861399335
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,8,2,128,1,float16,fp8,0,0.2757973273595174
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,8,2,128,1,float16,float16,0,0.30390934149424237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,8,4,128,1,float16,fp8,0,0.35737065474192303
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,8,4,128,1,float16,float16,0,0.3954133192698161
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,fp8,0,0.36345068613688153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,8,8,128,1,float16,float16,0,0.37987732887268066
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,8,1,128,1,float16,float16,0,0.18360533316930136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,0.21080533663431802
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,float16,0,0.22611733277638754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,8,1,128,1,float16,fp8,0,0.1658506691455841
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,8,2,128,1,float16,float16,0,0.1898826758066813
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,8,2,128,1,float16,fp8,0,0.1759200096130371
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,8,4,128,1,float16,float16,0,0.21101866165796915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,8,4,128,1,float16,fp8,0,0.20746133724848428
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,float16,0,0.22616000970204672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,8,8,128,1,float16,fp8,0,0.21079999208450317
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,8,1,128,1,float16,float16,0,1.0588746865590413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,8,1,128,1,float16,fp8,0,0.9341440200805664
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,8,2,128,1,float16,float16,0,1.1593066851298015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,8,2,128,1,float16,fp8,0,1.0575573444366455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,8,4,128,1,float16,fp8,0,1.4201226234436035
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,8,4,128,1,float16,float16,0,1.6512266794840496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,float16,0,0.8245866298675537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,8,1,128,1,float16,float16,0,0.5776106516520182
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,0.7805919647216797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,8,1,128,1,float16,fp8,0,0.5038400093714396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,8,2,128,1,float16,float16,0,0.6314613421758016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,8,2,128,1,float16,fp8,0,0.5673653284708658
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,8,4,128,1,float16,float16,0,0.8793386618296305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,8,4,128,1,float16,fp8,0,0.8755786418914795
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,float16,0,0.8087999820709229
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,float16,0,0.45631468296051025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,0.45053335030873615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,8,8,128,1,float16,fp8,0,0.7889440059661865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,8,1,128,1,float16,float16,0,0.3216639955838521
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,8,1,128,1,float16,fp8,0,0.27822933594385785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,8,2,128,1,float16,float16,0,0.3460426727930705
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,8,2,128,1,float16,fp8,0,0.31302400430043537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,8,4,128,1,float16,float16,0,0.4561013380686442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,8,4,128,1,float16,fp8,0,0.42660800615946454
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,float16,0,0.45848532517751056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,0.24733867247899374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,float16,0,0.25552000602086383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,8,8,128,1,float16,fp8,0,0.4749813477198283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,8,1,128,1,float16,float16,0,0.18502932786941528
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,8,1,128,1,float16,fp8,0,0.1685546636581421
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,8,2,128,1,float16,float16,0,0.19764800866444907
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,8,2,128,1,float16,fp8,0,0.18361600240071616
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,8,4,128,1,float16,float16,0,0.2502453327178955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,8,4,128,1,float16,fp8,0,0.2299733360608419
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,float16,0,0.2563626567522685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,8,8,128,1,float16,fp8,0,0.24842133124669394
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,0.1402346690495809
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,float16,0,0.14242666959762573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,8,1,128,1,float16,float16,0,0.11740266283353169
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,8,1,128,1,float16,fp8,0,0.10914666453997295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,8,2,128,1,float16,float16,0,0.12340266505877177
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,8,2,128,1,float16,fp8,0,0.11545067032178243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,8,4,128,1,float16,float16,0,0.13920000195503235
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,8,4,128,1,float16,fp8,0,0.1401653289794922
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,fp8,0,0.14008000493049622
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,8,8,128,1,float16,float16,0,0.14450132846832275
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,8,1,128,1,float16,float16,0,1.0218026638031006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,8,1,128,1,float16,fp8,0,0.9214666684468588
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,8,2,128,1,float16,fp8,0,1.076581319173177
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,8,2,128,1,float16,float16,0,1.17739733060201
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,0.855685313542684
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,float16,0,0.8736586570739746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,8,4,128,1,float16,float16,0,1.8333759307861328
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,8,4,128,1,float16,fp8,0,1.8502559661865234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,8,1,128,1,float16,float16,0,0.5511840184529623
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,8,1,128,1,float16,fp8,0,0.48309866587320965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,8,2,128,1,float16,float16,0,0.6098933219909668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,8,2,128,1,float16,fp8,0,0.5674986839294434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,8,4,128,1,float16,float16,0,0.9011572996775309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,8,4,128,1,float16,fp8,0,0.8266399701436361
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,float16,0,0.8682879606882731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,8,8,128,1,float16,fp8,0,0.8760586579640707
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,0.4612106482187907
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,float16,0,0.4678719838460286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,8,1,128,1,float16,float16,0,0.2985386649767558
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,8,1,128,1,float16,fp8,0,0.26894932985305786
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,8,2,128,1,float16,float16,0,0.3407680193583171
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,8,2,128,1,float16,fp8,0,0.3137493332227071
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,8,4,128,1,float16,float16,0,0.4700640042622884
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,8,4,128,1,float16,fp8,0,0.46293866634368896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,float16,0,0.46001601219177246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,8,8,128,1,float16,fp8,0,0.4620533386866252
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,float16,0,0.2589919964472453
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,0.26044267416000366
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,8,1,128,1,float16,float16,0,0.16709333658218384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,8,1,128,1,float16,fp8,0,0.15262933572133383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,8,2,128,1,float16,float16,0,0.18774400154749551
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,8,2,128,1,float16,fp8,0,0.16738667090733847
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,8,4,128,1,float16,float16,0,0.27347733577092487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,8,4,128,1,float16,fp8,0,0.25271467367808026
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,float16,0,0.25969600677490234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,8,8,128,1,float16,fp8,0,0.26021866003672284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,0.13940266768137613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,8,1,128,1,float16,float16,0,0.10431999961535136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,float16,0,0.14762133359909058
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,8,1,128,1,float16,fp8,0,0.09846933682759602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,8,2,128,1,float16,float16,0,0.11284266908963521
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,8,2,128,1,float16,fp8,0,0.10686399539311726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,8,4,128,1,float16,float16,0,0.13553067048390707
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,8,4,128,1,float16,fp8,0,0.1402400036652883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,fp8,0,0.1413333316644033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,8,8,128,1,float16,float16,0,0.14979733030001322
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,float16,0,0.08061866462230682
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.08453866839408875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,8,1,128,1,float16,float16,0,0.06437333424886067
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,8,1,128,1,float16,fp8,0,0.06259733438491821
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,8,2,128,1,float16,float16,0,0.06902400155862172
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,8,2,128,1,float16,fp8,0,0.06706133484840393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,8,4,128,1,float16,float16,0,0.07933866480986278
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,8,4,128,1,float16,fp8,0,0.08281066517035167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,float16,0,0.07956799864768982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,8,8,128,1,float16,fp8,0,0.0845973292986552
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,8,1,128,1,float16,float16,0,0.6520373423894247
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,8,1,128,1,float16,fp8,0,0.5819520155588785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,8,2,128,1,float16,float16,0,0.7449759642283121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,8,2,128,1,float16,fp8,0,0.7097547054290771
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,8,4,128,1,float16,float16,0,1.1749760309855144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,8,4,128,1,float16,fp8,0,1.161295970280965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,float16,0,0.6090666850407919
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,8,1,128,1,float16,float16,0,0.35207466284434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,0.5905119975407919
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,8,1,128,1,float16,fp8,0,0.3159839908281962
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,8,2,128,1,float16,float16,0,0.40034667650858563
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,8,2,128,1,float16,fp8,0,0.37745598951975506
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,8,4,128,1,float16,float16,0,0.6180479923884074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,8,4,128,1,float16,fp8,0,0.6772212982177734
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,float16,0,0.5894666512807211
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,0.34381333986918133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,float16,0,0.3322880069414775
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,8,8,128,1,float16,fp8,0,0.6265279849370321
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,8,1,128,1,float16,float16,0,0.19196800390879312
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,8,1,128,1,float16,fp8,0,0.16826132933298746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,8,2,128,1,float16,float16,0,0.21964800357818604
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,8,2,128,1,float16,fp8,0,0.20619199673334757
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,8,4,128,1,float16,float16,0,0.3294559915860494
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,8,4,128,1,float16,fp8,0,0.3187946677207947
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,float16,0,0.3305973410606384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,float16,0,0.18225600322087607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,8,8,128,1,float16,fp8,0,0.3441173235575358
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,0.1814240018526713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,8,1,128,1,float16,float16,0,0.10977600018183391
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,8,1,128,1,float16,fp8,0,0.10125866532325745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,8,2,128,1,float16,float16,0,0.11955733100573222
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,8,2,128,1,float16,fp8,0,0.11572266618410747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,8,4,128,1,float16,float16,0,0.17893866697947183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,8,4,128,1,float16,fp8,0,0.1728480060895284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,fp8,0,0.1825653314590454
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,8,8,128,1,float16,float16,0,0.18279467026392618
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,float16,0,0.09306666254997253
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.09894399841626485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,8,1,128,1,float16,float16,0,0.07083199918270111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,8,1,128,1,float16,fp8,0,0.06645866731802623
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,8,2,128,1,float16,float16,0,0.07504533231258392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,8,2,128,1,float16,fp8,0,0.07275199890136719
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,8,4,128,1,float16,float16,0,0.09160000085830688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,8,4,128,1,float16,fp8,0,0.09662933150927226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,float16,0,0.09343467156092326
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,8,8,128,1,float16,fp8,0,0.0986293355623881
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,float16,0,0.06241066753864288
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,8,1,128,1,float16,float16,0,0.05186133086681366
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.06646933158238728
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,8,1,128,1,float16,fp8,0,0.04985600213209788
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,8,2,128,1,float16,float16,0,0.05429333448410034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,8,2,128,1,float16,fp8,0,0.053930665055910744
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,8,4,128,1,float16,float16,0,0.06155733267466227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,8,4,128,1,float16,fp8,0,0.06477333108584087
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,fp8,0,0.06611200173695882
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,8,8,128,1,float16,float16,0,0.06268266836802165
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,8,1,128,1,float16,fp8,0,0.6050666570663452
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,8,1,128,1,float16,float16,0,0.6696746349334717
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,8,2,128,1,float16,float16,0,0.805344025293986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,8,2,128,1,float16,fp8,0,0.7656640211741129
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,8,4,128,1,float16,float16,0,1.4725440343221028
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,float16,0,0.7304320335388184
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,0.691706657409668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,8,4,128,1,float16,fp8,0,1.5276479721069336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,8,1,128,1,float16,float16,0,0.3502613306045532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,8,1,128,1,float16,fp8,0,0.32177066802978516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,8,2,128,1,float16,float16,0,0.4240959882736206
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,8,2,128,1,float16,fp8,0,0.40484801928202313
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,8,4,128,1,float16,float16,0,0.7004480361938477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,8,4,128,1,float16,fp8,0,0.7923573652903239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,float16,0,0.6839199860890707
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,8,8,128,1,float16,fp8,0,0.6925866603851318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,float16,0,0.39898133277893066
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,0.41788268089294434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,8,1,128,1,float16,float16,0,0.19209067026774088
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,8,1,128,1,float16,fp8,0,0.17483200629552206
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,8,2,128,1,float16,float16,0,0.2312800089518229
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,8,2,128,1,float16,fp8,0,0.21715199947357178
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,8,4,128,1,float16,float16,0,0.36531198024749756
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,8,4,128,1,float16,fp8,0,0.35177067915598553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,float16,0,0.39085865020751953
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,float16,0,0.19663999478022257
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,8,8,128,1,float16,fp8,0,0.38681598504384357
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,0.2039360006650289
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,8,1,128,1,float16,float16,0,0.1014453371365865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,8,1,128,1,float16,fp8,0,0.09931199749310811
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,8,2,128,1,float16,float16,0,0.12307733297348022
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,8,2,128,1,float16,fp8,0,0.11161599556605022
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,8,4,128,1,float16,float16,0,0.20972800254821777
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,8,4,128,1,float16,fp8,0,0.18417600790659586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,float16,0,0.1967946688334147
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,8,8,128,1,float16,fp8,0,0.20365333557128906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,float16,0,0.1127839982509613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,8,1,128,1,float16,float16,0,0.06488533318042755
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.10710400342941284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,8,1,128,1,float16,fp8,0,0.06226666768391927
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,8,2,128,1,float16,float16,0,0.07238933444023132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,8,2,128,1,float16,fp8,0,0.07257600128650665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,8,4,128,1,float16,float16,0,0.0942133367061615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,8,4,128,1,float16,fp8,0,0.10358933607737224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,float16,0,0.11451733112335205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,8,8,128,1,float16,fp8,0,0.10351999600728352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,float16,0,0.055546666185061135
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,8,1,128,1,float16,float16,0,0.03979199876387914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.06366399923960368
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,8,1,128,1,float16,fp8,0,0.039674667020638786
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,8,2,128,1,float16,float16,0,0.04394666850566864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,8,2,128,1,float16,fp8,0,0.04558399816354116
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,8,4,128,1,float16,float16,0,0.05411200225353241
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,8,4,128,1,float16,fp8,0,0.06006933252016703
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,float16,0,0.056143999099731445
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,8,8,128,1,float16,fp8,0,0.060506666700045265
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,float16,0,0.044405331214269005
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.047770669062932335
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,8,1,128,1,float16,float16,0,0.03761066744724909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,8,1,128,1,float16,fp8,0,0.03753600021203359
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,8,2,128,1,float16,float16,0,0.03956266740957896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,8,2,128,1,float16,fp8,0,0.03878399978081385
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,8,4,128,1,float16,float16,0,0.04377600053946177
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,8,4,128,1,float16,fp8,0,0.04809066653251648
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,fp8,0,0.04786666731039683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,8,8,128,1,float16,float16,0,0.044106667240460716
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,8,1,128,1,float16,float16,0,0.4396746555964152
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,8,1,128,1,float16,fp8,0,0.4002559979756673
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,8,2,128,1,float16,float16,0,0.5336159865061442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,8,2,128,1,float16,fp8,0,0.5199573437372843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,8,4,128,1,float16,fp8,0,0.8909333546956381
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,float16,0,0.48634131749471027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,8,4,128,1,float16,float16,0,1.0036533673604329
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,8,1,128,1,float16,float16,0,0.23458667596181235
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,0.5049066543579102
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,8,1,128,1,float16,fp8,0,0.21756800015767416
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,8,2,128,1,float16,float16,0,0.2839306592941284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,8,2,128,1,float16,fp8,0,0.28143467505772907
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,8,4,128,1,float16,float16,0,0.5385973453521729
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,8,4,128,1,float16,fp8,0,0.5840053160985311
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,float16,0,0.25890133778254193
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,float16,0,0.4868533213933309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,8,8,128,1,float16,fp8,0,0.5515146652857462
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,0.2871786753336589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,8,1,128,1,float16,float16,0,0.13049599528312683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,8,1,128,1,float16,fp8,0,0.11204800009727478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,8,2,128,1,float16,fp8,0,0.15179733435312906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,8,2,128,1,float16,float16,0,0.16075199842453003
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,8,4,128,1,float16,float16,0,0.2738399902979533
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,8,4,128,1,float16,fp8,0,0.2640213370323181
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,fp8,0,0.2807360092798869
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,8,8,128,1,float16,float16,0,0.26794666051864624
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,float16,0,0.14191466569900513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.14909332990646362
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,8,1,128,1,float16,float16,0,0.0710506687561671
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,8,1,128,1,float16,fp8,0,0.06850133339564006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,8,2,128,1,float16,float16,0,0.08131733536720276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,8,2,128,1,float16,fp8,0,0.08407466610272725
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,8,4,128,1,float16,float16,0,0.1356000006198883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,8,4,128,1,float16,fp8,0,0.1309386690457662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,float16,0,0.1418293317159017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,8,8,128,1,float16,fp8,0,0.15134933590888977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.07690666615962982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,float16,0,0.06891199946403503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,8,1,128,1,float16,float16,0,0.04752000172932943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,8,1,128,1,float16,fp8,0,0.045824001232783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,8,2,128,1,float16,float16,0,0.051882664362589516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,8,2,128,1,float16,fp8,0,0.053898667295773826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,8,4,128,1,float16,fp8,0,0.07641066610813141
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,8,4,128,1,float16,float16,0,0.06859733164310455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,fp8,0,0.07863999903202057
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,8,8,128,1,float16,float16,0,0.06980800131956737
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.04993066688378652
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,float16,0,0.0440533310174942
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,8,1,128,1,float16,float16,0,0.033439998825391136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,8,1,128,1,float16,fp8,0,0.033471999069054924
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,8,2,128,1,float16,float16,0,0.035674666364987694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,8,2,128,1,float16,fp8,0,0.0377866675456365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,8,4,128,1,float16,float16,0,0.04388799766699473
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,8,4,128,1,float16,fp8,0,0.047824000318845115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,float16,0,0.04391466577847799
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,8,8,128,1,float16,fp8,0,0.04972266654173533
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,float16,0,0.035536001125971474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.038805333276589714
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,8,1,128,1,float16,float16,0,0.031258667508761086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,8,1,128,1,float16,fp8,0,0.03162666658560435
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,8,2,128,1,float16,float16,0,0.03146666785081228
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,8,2,128,1,float16,fp8,0,0.0334346666932106
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,8,4,128,1,float16,float16,0,0.0366293340921402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,8,4,128,1,float16,fp8,0,0.03950933367013931
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,float16,0,0.035775999228159584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,8,8,128,1,float16,fp8,0,0.03958933303753535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,8,1,128,1,float16,float16,0,0.47315200169881183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,8,1,128,1,float16,fp8,0,0.4456799825032552
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,8,2,128,1,float16,float16,0,0.6107093493143717
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,8,2,128,1,float16,fp8,0,0.6060373385747274
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,float16,0,0.5818186601003011
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,8,4,128,1,float16,float16,0,1.1676853497823079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,8,4,128,1,float16,fp8,0,1.3602026303609211
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,0.6032640139261881
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,8,1,128,1,float16,float16,0,0.2510293324788411
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,8,1,128,1,float16,fp8,0,0.23511467377344766
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,8,2,128,1,float16,float16,0,0.3262773354848226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,8,2,128,1,float16,fp8,0,0.3202773332595825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,8,4,128,1,float16,float16,0,0.658133347829183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,8,4,128,1,float16,fp8,0,0.578437328338623
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,float16,0,0.6208000183105469
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,float16,0,0.3423893451690674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,8,8,128,1,float16,fp8,0,0.7002933025360107
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,0.3220213254292806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,8,1,128,1,float16,float16,0,0.13677866260210672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,8,1,128,1,float16,fp8,0,0.12595733006795248
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,8,2,128,1,float16,float16,0,0.17031999429066977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,8,2,128,1,float16,fp8,0,0.16804800430933634
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,8,4,128,1,float16,float16,0,0.315226674079895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,8,4,128,1,float16,fp8,0,0.3043786684672038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,float16,0,0.30188800891240436
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,8,8,128,1,float16,fp8,0,0.3221493363380432
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,float16,0,0.16634133458137512
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,8,1,128,1,float16,float16,0,0.07135466734568278
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.17218132813771567
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,8,1,128,1,float16,fp8,0,0.07032533486684163
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,8,2,128,1,float16,float16,0,0.09147733449935913
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,8,2,128,1,float16,fp8,0,0.08760533730189006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,8,4,128,1,float16,float16,0,0.17653866608937582
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,8,4,128,1,float16,fp8,0,0.16134400169054666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,float16,0,0.16680532693862915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,8,8,128,1,float16,fp8,0,0.17698667446772257
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.08614400029182434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,float16,0,0.09158933162689209
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,8,1,128,1,float16,float16,0,0.04600533346335093
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,8,1,128,1,float16,fp8,0,0.04563199977080027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,8,2,128,1,float16,float16,0,0.052517334620157875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,8,2,128,1,float16,fp8,0,0.05409066875775655
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,8,4,128,1,float16,float16,0,0.07509333391984303
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,8,4,128,1,float16,fp8,0,0.08619733651479085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,fp8,0,0.08709866801897685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,8,8,128,1,float16,float16,0,0.08709333340326945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.05007466673851013
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,float16,0,0.043935999274253845
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,8,1,128,1,float16,float16,0,0.027429332335789997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,8,1,128,1,float16,fp8,0,0.029194665451844532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,8,2,128,1,float16,float16,0,0.0330826664964358
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,8,2,128,1,float16,fp8,0,0.03369600077470144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,8,4,128,1,float16,float16,0,0.04232533276081085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,8,4,128,1,float16,fp8,0,0.04810666541258494
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,fp8,0,0.05009600023428599
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,8,8,128,1,float16,float16,0,0.044112001856168113
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,float16,0,0.03324266771475474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.0358240008354187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,8,1,128,1,float16,float16,0,0.025514667232831318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,8,1,128,1,float16,fp8,0,0.02584533393383026
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,8,2,128,1,float16,float16,0,0.027450665831565857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,8,2,128,1,float16,fp8,0,0.029557332396507263
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,8,4,128,1,float16,float16,0,0.03342399994532267
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,8,4,128,1,float16,fp8,0,0.035743998984495796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,float16,0,0.03329066683848699
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,8,8,128,1,float16,fp8,0,0.035461333890755974
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,float16,0,0.02861333390076955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.0295413335164388
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,8,1,128,1,float16,float16,0,0.02367466688156128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,8,1,128,1,float16,fp8,0,0.023242667317390442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,8,2,128,1,float16,float16,0,0.025578667720158894
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,8,2,128,1,float16,fp8,0,0.025578667720158894
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,8,4,128,1,float16,float16,0,0.027471999327341717
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,8,4,128,1,float16,fp8,0,0.029311999678611755
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,float16,0,0.02890666574239731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,8,8,128,1,float16,fp8,0,0.029717333614826202
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,8,1,128,1,float16,float16,0,0.388922651608785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,8,1,128,1,float16,fp8,0,0.36186134815216064
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,8,2,128,1,float16,float16,0,0.5278613169987997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,8,2,128,1,float16,fp8,0,0.5252480109532675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,float16,0,0.611573338508606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,0.6632320086161295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,8,4,128,1,float16,float16,0,1.068943977355957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,8,4,128,1,float16,fp8,0,1.2787840366363525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,8,1,128,1,float16,float16,0,0.20836800336837769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,8,1,128,1,float16,fp8,0,0.19008000691731772
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,8,2,128,1,float16,float16,0,0.2730506658554077
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,8,2,128,1,float16,fp8,0,0.27327466011047363
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,8,4,128,1,float16,float16,0,0.5648106733957926
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,8,4,128,1,float16,fp8,0,0.5350613196690878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,float16,0,0.3169013261795044
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.3446933428446452
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,float16,0,0.5299946864446005
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,8,8,128,1,float16,fp8,0,0.6600373188654581
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,8,1,128,1,float16,float16,0,0.11163199941317241
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,8,1,128,1,float16,fp8,0,0.10326932867368062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,8,2,128,1,float16,float16,0,0.151119997104009
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,8,2,128,1,float16,fp8,0,0.14428266882896423
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,8,4,128,1,float16,float16,0,0.28434133529663086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,8,4,128,1,float16,fp8,0,0.29978134234746295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.166485329469045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,float16,0,0.14524267117182413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,float16,0,0.2934986750284831
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,8,8,128,1,float16,fp8,0,0.3468799988428752
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,8,1,128,1,float16,float16,0,0.058005332946777344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,8,1,128,1,float16,fp8,0,0.05625600119431814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,8,2,128,1,float16,float16,0,0.07574399809042613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,8,2,128,1,float16,fp8,0,0.07032533486684163
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,8,4,128,1,float16,float16,0,0.16382933656374613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,8,4,128,1,float16,fp8,0,0.16164799531300864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,float16,0,0.1535040040810903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,float16,0,0.08057066798210144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,8,8,128,1,float16,fp8,0,0.15771200259526572
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.07881066699822743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,8,1,128,1,float16,float16,0,0.035530666510264076
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,8,1,128,1,float16,fp8,0,0.03547733277082443
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,8,2,128,1,float16,float16,0,0.042581334710121155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,8,2,128,1,float16,fp8,0,0.04550399879614512
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,8,4,128,1,float16,float16,0,0.06451733410358429
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,8,4,128,1,float16,fp8,0,0.07685866455237071
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,fp8,0,0.07872533301512401
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,8,8,128,1,float16,float16,0,0.08095466593901317
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,float16,0,0.03750933210055033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,8,1,128,1,float16,float16,0,0.023306667804718018
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.04571733375390371
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,8,1,128,1,float16,fp8,0,0.023498666783173878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,8,2,128,1,float16,float16,0,0.025562666356563568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,8,2,128,1,float16,fp8,0,0.029631999631722767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,8,4,128,1,float16,float16,0,0.037690666814645134
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,8,4,128,1,float16,fp8,0,0.0436160018046697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,float16,0,0.03773866593837738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,8,8,128,1,float16,fp8,0,0.04565866788228353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,float16,0,0.027376001079877216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,8,1,128,1,float16,fp8,0,0.021168000996112823
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.03151999910672506
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,8,1,128,1,float16,float16,0,0.02056533346573512
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,8,2,128,1,float16,float16,0,0.021290667355060577
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,8,2,128,1,float16,fp8,0,0.023423999547958374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,8,4,128,1,float16,float16,0,0.02739199995994568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,8,4,128,1,float16,fp8,0,0.031178665657838184
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,float16,0,0.027621333797772724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,8,8,128,1,float16,fp8,0,0.03148799886306127
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,float16,0,0.0230880007147789
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.025248001019159954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,8,2,128,1,float16,float16,0,0.01913600042462349
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,8,1,128,1,float16,fp8,0,0.018911999960740406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,8,1,128,1,float16,float16,0,0.018981333822011948
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,8,2,128,1,float16,fp8,0,0.021007999777793884
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,8,4,128,1,float16,float16,0,0.02149333308140437
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,8,4,128,1,float16,fp8,0,0.023455999791622162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,float16,0,0.02319466571013133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,8,8,128,1,float16,fp8,0,0.025066666305065155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.020981334149837494
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,float16,0,0.019109333554903667
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,8,1,128,1,float16,float16,0,0.019152000546455383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,8,1,128,1,float16,fp8,0,0.019093333433071773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,8,2,128,1,float16,float16,0,0.018768000106016796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,8,2,128,1,float16,fp8,0,0.017344000438849132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,8,4,128,1,float16,fp8,0,0.020975999534130096
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,8,4,128,1,float16,float16,0,0.01940800001223882
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,fp8,0,0.0200853335360686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,8,8,128,1,float16,float16,0,0.019333332777023315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,8,1,128,1,float16,fp8,0,0.16645866632461548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,8,1,128,1,float16,float16,0,0.17294400930404663
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,8,2,128,1,float16,float16,0,0.24195200204849243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,8,2,128,1,float16,fp8,0,0.24470933278401694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,float16,0,0.29903467496236164
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.275706668694814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,8,4,128,1,float16,float16,0,0.542143980662028
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,8,4,128,1,float16,fp8,0,0.5637120008468628
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,8,1,128,1,float16,float16,0,0.09004799524943034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,8,1,128,1,float16,fp8,0,0.08815466364224751
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,8,2,128,1,float16,float16,0,0.12389866511027019
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,8,2,128,1,float16,fp8,0,0.1357919971148173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,8,4,128,1,float16,float16,0,0.2638133366902669
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,8,4,128,1,float16,fp8,0,0.28119466702143353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,float16,0,0.144186665614446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.15897066394488016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,float16,0,0.29691733916600543
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,8,8,128,1,float16,fp8,0,0.28331200281778973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,8,1,128,1,float16,float16,0,0.04773866633574168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,8,1,128,1,float16,fp8,0,0.049728001157442726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,8,2,128,1,float16,float16,0,0.06440000236034393
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,8,2,128,1,float16,fp8,0,0.06381333371003468
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,8,4,128,1,float16,float16,0,0.1539306640625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,8,4,128,1,float16,fp8,0,0.15028799573580423
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,float16,0,0.14512532949447632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,float16,0,0.07579199969768524
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,8,8,128,1,float16,fp8,0,0.15005333224932352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.07458133498827617
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,8,1,128,1,float16,fp8,0,0.03123733401298523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,8,2,128,1,float16,fp8,0,0.040149333576361336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,8,1,128,1,float16,float16,0,0.029167999823888142
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,8,2,128,1,float16,float16,0,0.03628266602754593
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,8,4,128,1,float16,float16,0,0.05793066819508871
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,8,4,128,1,float16,fp8,0,0.07040533423423767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,fp8,0,0.07240533332029979
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,8,8,128,1,float16,float16,0,0.07426133255163829
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.04188266893227895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,8,1,128,1,float16,float16,0,0.019066666563351948
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,float16,0,0.03346666693687439
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,8,1,128,1,float16,fp8,0,0.021125334004561108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,8,2,128,1,float16,float16,0,0.023157333334287006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,8,2,128,1,float16,fp8,0,0.02554133286078771
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,8,4,128,1,float16,float16,0,0.03342399994532267
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,8,4,128,1,float16,fp8,0,0.03976000100374222
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,fp8,0,0.04190400242805481
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,8,8,128,1,float16,float16,0,0.0337119996547699
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,float16,0,0.025493333737055462
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,8,1,128,1,float16,float16,0,0.017152000218629837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.02738133321205775
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,8,2,128,1,float16,float16,0,0.019013332823912304
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,8,1,128,1,float16,fp8,0,0.019248000035683315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,8,2,128,1,float16,fp8,0,0.019130667050679524
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,8,4,128,1,float16,float16,0,0.025146665672461193
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,8,4,128,1,float16,fp8,0,0.02756800005833308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,float16,0,0.02531733363866806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,8,8,128,1,float16,fp8,0,0.029157333076000214
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,float16,0,0.019530666371186573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.021151999632517498
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,8,1,128,1,float16,float16,0,0.015856000284353893
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,8,1,128,1,float16,fp8,0,0.01609066625436147
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,8,2,128,1,float16,float16,0,0.017157333592573803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,8,2,128,1,float16,fp8,0,0.01728533332546552
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,8,4,128,1,float16,float16,0,0.019365333020687103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,8,4,128,1,float16,fp8,0,0.02117866774400075
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,float16,0,0.019413333386182785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,8,8,128,1,float16,fp8,0,0.021370666722456615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,float16,0,0.017125333348910015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.017279999951521557
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,8,1,128,1,float16,float16,0,0.015290666371583939
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,8,1,128,1,float16,fp8,0,0.015114666273196539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,8,2,128,1,float16,float16,0,0.015226667126019796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,8,2,128,1,float16,fp8,0,0.015370666980743408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,8,4,128,1,float16,float16,0,0.01710933322707812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,8,4,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,fp8,0,0.017018667111794155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,8,8,128,1,float16,float16,0,0.01758933315674464
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,float16,0,0.015397333850463232
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,8,1,128,1,float16,float16,0,0.015397333850463232
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.015050667027632395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,8,1,128,1,float16,fp8,0,0.015295999745527903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,8,2,128,1,float16,fp8,0,0.015194666882356008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,8,4,128,1,float16,float16,0,0.015018666783968607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,8,4,128,1,float16,fp8,0,0.015226667126019796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,8,2,128,1,float16,float16,0,0.015925332903862
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,fp8,0,0.015013333410024643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,8,8,128,1,float16,float16,0,0.015210667004187902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,8,1,128,1,float16,float16,0,0.10221866766611735
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,8,2,128,1,float16,float16,0,0.13899733622868857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,8,2,128,1,float16,fp8,0,0.14573333660761514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,8,1,128,1,float16,fp8,0,0.09834667046864827
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,float16,0,0.15036267042160034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,8,4,128,1,float16,float16,0,0.29629866282145184
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,8,4,128,1,float16,fp8,0,0.26789865891138714
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.1525920033454895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,8,1,128,1,float16,float16,0,0.05201066533724467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,8,1,128,1,float16,fp8,0,0.05293866495291392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,8,2,128,1,float16,float16,0,0.07212799787521362
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,8,2,128,1,float16,fp8,0,0.07049066821734111
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,8,4,128,1,float16,float16,0,0.1434346636136373
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,8,4,128,1,float16,fp8,0,0.14481066664059958
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,float16,0,0.07867200175921123
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,float16,0,0.15705600380897522
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.0732586681842804
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,8,8,128,1,float16,fp8,0,0.15962666273117065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,8,1,128,1,float16,float16,0,0.03328000009059906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,8,1,128,1,float16,fp8,0,0.033471999069054924
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,8,2,128,1,float16,float16,0,0.03947199881076813
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,8,2,128,1,float16,fp8,0,0.04179200033346812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,8,4,128,1,float16,float16,0,0.06198933223883311
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,8,4,128,1,float16,fp8,0,0.07418666779994965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,float16,0,0.07461866736412048
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.0435146689414978
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,float16,0,0.03540800015131632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,8,8,128,1,float16,fp8,0,0.07644799848397572
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,8,1,128,1,float16,float16,0,0.021168000996112823
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,8,1,128,1,float16,fp8,0,0.021013334393501282
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,8,2,128,1,float16,float16,0,0.024186665813128155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,8,2,128,1,float16,fp8,0,0.025455998877684276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,8,4,128,1,float16,float16,0,0.035301332672437034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,8,4,128,1,float16,fp8,0,0.041434665520985924
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,float16,0,0.0355679988861084
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,float16,0,0.023141334454218548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,8,8,128,1,float16,fp8,0,0.04174399872620901
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.025397333006064098
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,8,1,128,1,float16,fp8,0,0.01525866612792015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,8,1,128,1,float16,float16,0,0.015263999501864115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,8,2,128,1,float16,float16,0,0.01716800034046173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,8,2,128,1,float16,fp8,0,0.017279999951521557
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,8,4,128,1,float16,float16,0,0.023402666052182514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,8,4,128,1,float16,fp8,0,0.025536000728607178
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,float16,0,0.021456000705560047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,8,8,128,1,float16,fp8,0,0.025402667621771496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,float16,0,0.017530667285124462
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.01953599974513054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,8,1,128,1,float16,float16,0,0.013072000195582708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,8,1,128,1,float16,fp8,0,0.01314666618903478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,8,2,128,1,float16,float16,0,0.014981333166360855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,8,2,128,1,float16,fp8,0,0.015189333508412043
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,8,4,128,1,float16,float16,0,0.01716800034046173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,8,4,128,1,float16,fp8,0,0.01922133316596349
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,float16,0,0.017573333034912746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,8,8,128,1,float16,fp8,0,0.01934933289885521
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,float16,0,0.015002666662136713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,8,1,128,1,float16,float16,0,0.013514666507641474
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,8,1,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,8,2,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,8,2,128,1,float16,fp8,0,0.013125333935022354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,8,4,128,1,float16,float16,0,0.01505600040157636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,8,4,128,1,float16,fp8,0,0.015439999600251516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,float16,0,0.015200000256299973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,8,8,128,1,float16,fp8,0,0.015210667004187902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.013199999928474426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,8,1,128,1,float16,float16,0,0.012858666479587555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,8,1,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,8,2,128,1,float16,float16,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,8,2,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,8,4,128,1,float16,fp8,0,0.01309866706530253
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,8,4,128,1,float16,float16,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,float16,0,0.01312000056107839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,8,8,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,float16,0,0.012805332740147909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,8,1,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.012831999609867731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,8,1,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,8,2,128,1,float16,float16,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,8,2,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,8,4,128,1,float16,float16,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,8,4,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,float16,0,0.012842666357755661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,8,8,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,8,1,128,1,float16,float16,0,0.07669333120187123
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,8,1,128,1,float16,fp8,0,0.0699786643187205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,8,2,128,1,float16,float16,0,0.09504533807436626
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,float16,0,0.08321600159009297
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,8,4,128,1,float16,float16,0,0.18036266167958578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,8,4,128,1,float16,fp8,0,0.15011733770370483
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,8,2,128,1,float16,fp8,0,0.0858133335908254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.08221866687138875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,8,1,128,1,float16,float16,0,0.043840001026789345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,8,1,128,1,float16,fp8,0,0.04093866546948751
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,8,2,128,1,float16,float16,0,0.049866666396458946
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,8,2,128,1,float16,fp8,0,0.05201066533724467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,8,4,128,1,float16,float16,0,0.07255466779073079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,8,4,128,1,float16,fp8,0,0.08065600196520488
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.04751466711362203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,float16,0,0.041696002086003624
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,fp8,0,0.08494399984677632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,8,8,128,1,float16,float16,0,0.08278400202592213
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,8,1,128,1,float16,float16,0,0.02604266752799352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,8,1,128,1,float16,fp8,0,0.02550933261712392
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,8,2,128,1,float16,float16,0,0.030997333427270252
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,8,2,128,1,float16,fp8,0,0.031285333136717476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,8,4,128,1,float16,float16,0,0.039818666875362396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,8,4,128,1,float16,fp8,0,0.04598933458328247
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,float16,0,0.041759997606277466
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.02926933268706004
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,float16,0,0.02521066615978877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,8,8,128,1,float16,fp8,0,0.04829333225886027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,8,1,128,1,float16,float16,0,0.019280000279347103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,8,1,128,1,float16,fp8,0,0.01877333347996076
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,8,2,128,1,float16,float16,0,0.01899733394384384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,8,2,128,1,float16,fp8,0,0.02092266579469045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,8,4,128,1,float16,float16,0,0.025429333249727886
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,8,4,128,1,float16,fp8,0,0.030080000559488933
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,float16,0,0.025461333493391674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,8,8,128,1,float16,fp8,0,0.02922133356332779
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,float16,0,0.017077332983414333
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.019280000279347103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,8,1,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,8,1,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,8,2,128,1,float16,fp8,0,0.015178666760524115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,8,2,128,1,float16,float16,0,0.014858666807413101
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,8,4,128,1,float16,float16,0,0.01710933322707812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,8,4,128,1,float16,fp8,0,0.019152000546455383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,float16,0,0.01728533332546552
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,8,8,128,1,float16,fp8,0,0.019152000546455383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.015210667004187902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,8,1,128,1,float16,float16,0,0.012784000486135483
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,float16,0,0.014362666755914688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,8,1,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,8,2,128,1,float16,float16,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,8,2,128,1,float16,fp8,0,0.013221333424250284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,8,4,128,1,float16,float16,0,0.013306666165590286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,8,4,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,fp8,0,0.01533866673707962
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,8,8,128,1,float16,float16,0,0.015061333775520325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,float16,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.013290667285521826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,8,1,128,1,float16,float16,0,0.012773333738247553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,8,1,128,1,float16,fp8,0,0.013034666577974955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,8,2,128,1,float16,float16,0,0.012981332838535309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,8,2,128,1,float16,fp8,0,0.01328533391157786
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,8,4,128,1,float16,float16,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,8,4,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,8,8,128,1,float16,float16,0,0.013007999708255133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,float16,0,0.012768000364303589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,8,1,128,1,float16,float16,0,0.012853333105643591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,8,1,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,8,2,128,1,float16,float16,0,0.012821332861979803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,8,2,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,8,4,128,1,float16,float16,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,8,4,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,float16,0,0.013072000195582708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,8,8,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,float16,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,8,1,128,1,float16,float16,0,0.011237333218256632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,8,1,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,8,2,128,1,float16,float16,0,0.011231999844312668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,8,4,128,1,float16,float16,0,0.012954667210578918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,8,2,128,1,float16,fp8,0,0.012938667088747025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,8,4,128,1,float16,fp8,0,0.012970666090647379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,fp8,0,0.012682666381200155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,8,8,128,1,float16,float16,0,0.013141332815090815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,8,1,128,1,float16,float16,0,0.06683200101057689
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,8,1,128,1,float16,fp8,0,0.058042665322621666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,8,2,128,1,float16,float16,0,0.07392533123493195
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,8,2,128,1,float16,fp8,0,0.06778666873772939
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,float16,0,0.05324266850948334
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,8,4,128,1,float16,fp8,0,0.09985066453615825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,8,4,128,1,float16,float16,0,0.10477866729100545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.05602133274078369
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,8,1,128,1,float16,fp8,0,0.035391998787721
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,8,2,128,1,float16,float16,0,0.04154133299986521
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,8,1,128,1,float16,float16,0,0.03788266579310099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,8,2,128,1,float16,fp8,0,0.03956799954175949
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,8,4,128,1,float16,float16,0,0.052015999952952065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,8,4,128,1,float16,fp8,0,0.05443733433882395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,float16,0,0.05266666909058889
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,float16,0,0.031498665610949196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.0336053321758906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,8,8,128,1,float16,fp8,0,0.055829331278800964
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,8,1,128,1,float16,fp8,0,0.023472001155217487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,8,2,128,1,float16,float16,0,0.026672000686327618
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,8,1,128,1,float16,float16,0,0.024501333634058636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,8,2,128,1,float16,fp8,0,0.02521066615978877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,8,4,128,1,float16,float16,0,0.03141866624355316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,8,4,128,1,float16,fp8,0,0.03365333378314972
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,fp8,0,0.03342933456103007
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,8,8,128,1,float16,float16,0,0.03160533308982849
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,float16,0,0.021226666867733
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.021717332303524017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,8,1,128,1,float16,float16,0,0.01717866708834966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,8,1,128,1,float16,fp8,0,0.015178666760524115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,8,2,128,1,float16,float16,0,0.018613333503405254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,8,2,128,1,float16,fp8,0,0.01876266673207283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,8,4,128,1,float16,float16,0,0.02102400114138921
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,8,4,128,1,float16,fp8,0,0.021349333226680756
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,float16,0,0.021322667598724365
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,8,8,128,1,float16,fp8,0,0.02179733415444692
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,float16,0,0.014837333311637243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.015354666858911514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,8,1,128,1,float16,float16,0,0.013194666554530462
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,8,1,128,1,float16,fp8,0,0.013130666067202887
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,8,2,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,8,2,128,1,float16,float16,0,0.012773333738247553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,8,4,128,1,float16,float16,0,0.01492799942692121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,8,4,128,1,float16,fp8,0,0.015184000134468079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,fp8,0,0.015295999745527903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,8,8,128,1,float16,float16,0,0.014896000425020853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,float16,0,0.012608000387748083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.012703999876976013
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,8,1,128,1,float16,float16,0,0.011370666325092316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,8,1,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,8,2,128,1,float16,float16,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,8,2,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,8,4,128,1,float16,fp8,0,0.01312000056107839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,8,4,128,1,float16,float16,0,0.013114667187134424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,float16,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,8,8,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,float16,0,0.010938666760921478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,8,1,128,1,float16,float16,0,0.011722666521867117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,8,1,128,1,float16,fp8,0,0.012815999488035837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,8,2,128,1,float16,float16,0,0.010901333143313726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,8,2,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,8,4,128,1,float16,float16,0,0.012853333105643591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,8,4,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,fp8,0,0.011253333340088526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,8,8,128,1,float16,float16,0,0.011306667079528173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,float16,0,0.012826666235923767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,8,1,128,1,float16,float16,0,0.011205332974592844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,8,1,128,1,float16,fp8,0,0.011130666981140772
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,8,2,128,1,float16,float16,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,8,2,128,1,float16,fp8,0,0.012223999947309494
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,8,4,128,1,float16,float16,0,0.01098666712641716
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,8,4,128,1,float16,fp8,0,0.012709333250919977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,float16,0,0.012655999511480331
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,8,8,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,float16,0,0.010954666882753372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,8,1,128,1,float16,float16,0,0.012159999459981918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,8,1,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,8,2,128,1,float16,float16,0,0.01090666651725769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,8,2,128,1,float16,fp8,0,0.011237333218256632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,8,4,128,1,float16,float16,0,0.012693333129088083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,8,4,128,1,float16,fp8,0,0.013002666334311167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,float16,0,0.011407999942700068
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,8,8,128,1,float16,fp8,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,8,1,128,1,float16,float16,0,0.06278400123119354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,8,1,128,1,float16,fp8,0,0.05190933247407278
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,8,2,128,1,float16,float16,0,0.06649066507816315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,8,2,128,1,float16,fp8,0,0.05817066629727682
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,8,4,128,1,float16,float16,0,0.07589866717656453
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,8,4,128,1,float16,fp8,0,0.07247999807198842
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.04172799984614054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,float16,0,0.043663998444875084
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,8,1,128,1,float16,fp8,0,0.032261334359645844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,8,1,128,1,float16,float16,0,0.036346666514873505
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,8,2,128,1,float16,float16,0,0.037632000943024956
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,8,2,128,1,float16,fp8,0,0.03403199960788091
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,8,4,128,1,float16,float16,0,0.043951998154322304
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,float16,0,0.02722666660944621
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,8,4,128,1,float16,fp8,0,0.04196799794832865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,float16,0,0.043680002291997276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,8,8,128,1,float16,fp8,0,0.04355733096599579
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.02757333219051361
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,8,1,128,1,float16,fp8,0,0.02124800036350886
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,8,2,128,1,float16,float16,0,0.025008000433444977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,8,1,128,1,float16,float16,0,0.0234400009115537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,8,2,128,1,float16,fp8,0,0.02348266790310542
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,8,4,128,1,float16,float16,0,0.028725333511829376
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,8,4,128,1,float16,fp8,0,0.027274665733178455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,float16,0,0.02736533433198929
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,8,8,128,1,float16,fp8,0,0.02733866622050603
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.018933333456516266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,float16,0,0.017269333203633625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,8,1,128,1,float16,float16,0,0.016224000602960587
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,8,1,128,1,float16,fp8,0,0.01533866673707962
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,8,2,128,1,float16,float16,0,0.0169813334941864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,8,2,128,1,float16,fp8,0,0.015317333241303762
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,8,4,128,1,float16,fp8,0,0.018789333601792652
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,8,4,128,1,float16,float16,0,0.017386666188637417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,fp8,0,0.01811733345190684
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,8,8,128,1,float16,float16,0,0.017887999614079792
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,float16,0,0.013093333691358566
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.01322666679819425
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,8,1,128,1,float16,float16,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,8,1,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,8,2,128,1,float16,float16,0,0.01320533330241839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,8,2,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,8,4,128,1,float16,float16,0,0.013034666577974955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,8,4,128,1,float16,fp8,0,0.01320533330241839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,float16,0,0.013183999806642532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,8,8,128,1,float16,fp8,0,0.013183999806642532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,float16,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,8,1,128,1,float16,float16,0,0.012821332861979803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,8,2,128,1,float16,float16,0,0.012800000607967377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,8,1,128,1,float16,fp8,0,0.012863999853531519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,8,2,128,1,float16,fp8,0,0.012842666357755661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,8,4,128,1,float16,float16,0,0.012837332983811697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,8,4,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,float16,0,0.013007999708255133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,8,8,128,1,float16,fp8,0,0.013258667041858038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,float16,0,0.012784000486135483
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,8,1,128,1,float16,fp8,0,0.012815999488035837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,8,1,128,1,float16,float16,0,0.011328000575304031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,8,2,128,1,float16,float16,0,0.011242666592200598
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,8,2,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,8,4,128,1,float16,float16,0,0.012495999534924826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,8,4,128,1,float16,fp8,0,0.012693333129088083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,float16,0,0.01099733387430509
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,8,8,128,1,float16,fp8,0,0.012831999609867731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.012693333129088083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,float16,0,0.012847999731699625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,8,1,128,1,float16,float16,0,0.010954666882753372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,8,1,128,1,float16,fp8,0,0.011221333096424738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,8,2,128,1,float16,float16,0,0.01119999960064888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,8,2,128,1,float16,fp8,0,0.012831999609867731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,8,4,128,1,float16,float16,0,0.010949333508809408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,8,4,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,float16,0,0.01129066695769628
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,8,8,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.012757333616415659
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,float16,0,0.011242666592200598
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,8,1,128,1,float16,fp8,0,0.012944000462690989
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,8,1,128,1,float16,float16,0,0.011066666493813196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,8,2,128,1,float16,float16,0,0.012842666357755661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,8,2,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,8,4,128,1,float16,fp8,0,0.011061333119869232
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,8,4,128,1,float16,float16,0,0.011776000261306763
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,8,8,128,1,float16,float16,0,0.012794667234023413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,0,0.01110400011142095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,0,0.011450666934251785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,0,0.015008000036080679
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,0,0.014773332824309668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,0,0.019199999670187633
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.017429333180189133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,0,0.019050666441520054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,0,0.017360000560681026
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,0,0.010922666639089584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,0,0.012773333738247553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,0,0.011365332951148352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,0,0.014906667172908783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,0,0.013386666774749756
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,0,0.01899733394384384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,0,0.017114666601022083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,0,0.015013333410024643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.013338666409254074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,0,0.010949333508809408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,0,0.010768000036478043
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,0,0.011141333729028702
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,0,0.011018666128317514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,0,0.014965333044528961
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,0,0.010965333630641302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.011493333925803503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,0,0.010650667051474253
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,0,0.009136000027259191
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,0,0.009114666531483332
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,0,0.010703999549150467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,0,0.011120000233252844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,0,0.010944000134865442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,0,0.012117333710193634
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,0,0.010949333508809408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,0,0.009008000294367472
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,0,0.010885333021481832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,0,0.009557333464423815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,0,0.01062400018175443
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,0,0.012448000411192576
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,0,0.010847999403874079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,0,0.01108266661564509
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,0,0.010714666297038397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,0,0.009488000224033991
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,0,0.009413333609700203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.011237333218256632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,0,0.009189333145817121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,0,0.01022933361430963
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,0,0.010591999938090643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,0,0.009482666850090027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.010965333630641302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,0,0.00890666681031386
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,0,0.010757333288590113
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,0,0.008997333546479544
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,0,0.010922666639089584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,0,0.009098666409651438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,0,0.011221333096424738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,0,0.009269333134094873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.010677333921194077
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,0,0.008933333059151968
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,0,0.010933333386977514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,0,0.010650667051474253
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,0,0.008922666932145754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,0,0.008949333180983862
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,0,0.010661333799362183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,0,0.010885333021481832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,0,0.010757333288590113
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,0,0.008821333448092142
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,0,0.009183999771873156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,0,0.008826666822036108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.010938666760921478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,0,0.011077333241701126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,0,0.008922666932145754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,0,0.009279999881982803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,0,0.0107893335322539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,0,0.010672000547250112
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,0,0.010949333508809408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,0,0.009125333279371262
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,4,1,128,1,float16,fp8,0,1.5108906428019206
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,4,1,128,1,float16,float16,0,1.7174132664998372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,4,2,128,1,float16,float16,0,1.8509386380513508
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,4,2,128,1,float16,fp8,0,1.6654507319132488
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,1.1460533142089844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,float16,0,1.3336586952209473
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,4,1,128,1,float16,float16,0,0.942250649134318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,4,1,128,1,float16,fp8,0,0.822271982828776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,4,2,128,1,float16,fp8,0,0.9057173728942871
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,4,2,128,1,float16,float16,0,1.0159520308176677
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,fp8,0,1.2880853017171223
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,4,4,128,1,float16,float16,0,1.4539626439412434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,0.6613759994506836
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,float16,0,0.7250293095906576
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,4,1,128,1,float16,float16,0,0.5436746676762899
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,4,1,128,1,float16,fp8,0,0.47330133120218915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,4,2,128,1,float16,float16,0,0.5839306513468424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,4,2,128,1,float16,fp8,0,0.5186506509780884
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,float16,0,0.750063975652059
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,4,4,128,1,float16,fp8,0,0.6727733612060547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,0.3834506670633952
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,float16,0,0.4232906500498454
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,4,1,128,1,float16,float16,0,0.33872000376383465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,4,1,128,1,float16,fp8,0,0.3020373384157817
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,4,2,128,1,float16,float16,0,0.34761067231496173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,4,2,128,1,float16,fp8,0,0.32281599442164105
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,float16,0,0.41677868366241455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,4,4,128,1,float16,fp8,0,0.38364799817403156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,4,1,128,1,float16,float16,0,1.0483840306599934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,4,1,128,1,float16,fp8,0,0.9194400310516357
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,4,2,128,1,float16,float16,0,1.153882662455241
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,4,2,128,1,float16,fp8,0,1.0551520188649495
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,float16,0,0.8687146504720052
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,0.8372213045756022
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,4,1,128,1,float16,float16,0,0.5909973382949829
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,4,1,128,1,float16,fp8,0,0.5119946797688802
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,4,2,128,1,float16,float16,0,0.6347466707229614
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,4,2,128,1,float16,fp8,0,0.5785760084788004
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,float16,0,0.867957353591919
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,4,4,128,1,float16,fp8,0,0.832757314046224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,float16,0,0.4893653392791748
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,0.45810667673746747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,4,1,128,1,float16,float16,0,0.34122665723164874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,4,1,128,1,float16,fp8,0,0.3100266655286153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,4,2,128,1,float16,float16,0,0.3638933499654134
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,4,2,128,1,float16,fp8,0,0.32705066601435345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,float16,0,0.4910080035527547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,4,4,128,1,float16,fp8,0,0.4620853265126546
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,float16,0,0.26051199436187744
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,0.2585120002428691
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,4,1,128,1,float16,float16,0,0.21577600638071695
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,4,2,128,1,float16,float16,0,0.2262186606725057
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,4,1,128,1,float16,fp8,0,0.1966773271560669
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,4,2,128,1,float16,fp8,0,0.21100266774495444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,float16,0,0.2596319913864136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,4,4,128,1,float16,fp8,0,0.2590986688931783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,4,1,128,1,float16,fp8,0,0.6844800313313802
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,4,1,128,1,float16,float16,0,0.7764800389607748
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,4,2,128,1,float16,float16,0,0.859328031539917
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,4,2,128,1,float16,fp8,0,0.7842666308085123
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,float16,0,0.6624266703923544
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,0.6068106492360433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,4,1,128,1,float16,fp8,0,0.3784960110982259
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,4,1,128,1,float16,float16,0,0.43703468640645343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,4,2,128,1,float16,float16,0,0.4797066847483317
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,4,2,128,1,float16,fp8,0,0.4426720142364502
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,fp8,0,0.6285333236058553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,4,4,128,1,float16,float16,0,0.6663039922714233
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,float16,0,0.37759466965993244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,4,1,128,1,float16,float16,0,0.2610986630121867
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,4,1,128,1,float16,fp8,0,0.23786665995915732
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,0.3342133363087972
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,4,2,128,1,float16,float16,0,0.2791093389193217
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,4,2,128,1,float16,fp8,0,0.25808533032735187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,float16,0,0.38394665718078613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,4,4,128,1,float16,fp8,0,0.34085333347320557
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,float16,0,0.19722666343053183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,0.19640000661214194
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,4,1,128,1,float16,float16,0,0.15871466199556986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,4,1,128,1,float16,fp8,0,0.14476266503334045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,4,2,128,1,float16,float16,0,0.167087992032369
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,4,2,128,1,float16,fp8,0,0.15626133481661478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,float16,0,0.19581333796183267
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,4,4,128,1,float16,fp8,0,0.19620800018310547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,4,1,128,1,float16,float16,0,0.9810506502787272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,4,1,128,1,float16,fp8,0,0.8710453510284424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,4,2,128,1,float16,float16,0,1.1189173062642415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,4,2,128,1,float16,fp8,0,1.0385440190633137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,0.8174933592478434
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,float16,0,0.8790187040964762
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,4,1,128,1,float16,float16,0,0.5398293336232504
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,4,1,128,1,float16,fp8,0,0.4794400135676066
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,4,2,128,1,float16,float16,0,0.6054133176803589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,4,2,128,1,float16,fp8,0,0.560149351755778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,float16,0,0.8839093049367269
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,4,4,128,1,float16,fp8,0,0.937882661819458
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,float16,0,0.49007999897003174
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,0.48794134457906085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,4,1,128,1,float16,float16,0,0.30662933985392254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,4,1,128,1,float16,fp8,0,0.2714986602465312
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,4,2,128,1,float16,float16,0,0.34907201925913495
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,4,2,128,1,float16,fp8,0,0.3147520025571187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,float16,0,0.490938663482666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,4,4,128,1,float16,fp8,0,0.4580906629562378
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,0.25470399856567383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,float16,0,0.27591999371846515
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,4,1,128,1,float16,float16,0,0.19022399187088013
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,4,1,128,1,float16,fp8,0,0.17522132396697998
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,4,2,128,1,float16,float16,0,0.2021226684252421
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,4,2,128,1,float16,fp8,0,0.19156799713770548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,float16,0,0.2755039930343628
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,0.15270933508872986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,float16,0,0.14871467153231302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,4,4,128,1,float16,fp8,0,0.25441600879033405
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,4,1,128,1,float16,float16,0,0.11761066317558289
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,4,1,128,1,float16,fp8,0,0.11194133758544922
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,4,2,128,1,float16,float16,0,0.12556800246238708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,4,2,128,1,float16,fp8,0,0.12016533811887105
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,float16,0,0.14712533354759216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,4,4,128,1,float16,fp8,0,0.1523360013961792
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,4,1,128,1,float16,float16,0,0.6153546571731567
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,4,1,128,1,float16,fp8,0,0.5519466797510783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,4,2,128,1,float16,float16,0,0.7206559975941976
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,4,2,128,1,float16,fp8,0,0.6783040364583334
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,0.559829314549764
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,float16,0,0.6058453321456909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,4,1,128,1,float16,float16,0,0.34168001015981037
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,4,1,128,1,float16,fp8,0,0.30878400802612305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,4,2,128,1,float16,float16,0,0.39667733510335285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,4,2,128,1,float16,fp8,0,0.3707306782404582
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,float16,0,0.3429226477940877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,fp8,0,0.5875360171000162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,4,4,128,1,float16,float16,0,0.5998133420944214
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,4,1,128,1,float16,float16,0,0.1938613255818685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,4,1,128,1,float16,fp8,0,0.17759466171264648
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,0.33472001552581787
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,4,2,128,1,float16,fp8,0,0.20035733779271445
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,4,2,128,1,float16,float16,0,0.222762664159139
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,float16,0,0.3424319823582967
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,float16,0,0.1689173380533854
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,4,4,128,1,float16,fp8,0,0.34513068199157715
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,0.17744000752766928
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,4,1,128,1,float16,float16,0,0.12377599875132243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,4,1,128,1,float16,fp8,0,0.11427199840545654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,4,2,128,1,float16,float16,0,0.1334879994392395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,4,2,128,1,float16,fp8,0,0.1295413374900818
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,fp8,0,0.17701866229375204
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,4,4,128,1,float16,float16,0,0.1699893275896708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,float16,0,0.11170666416486104
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,4,1,128,1,float16,float16,0,0.08885866403579712
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.11575466394424438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,4,1,128,1,float16,fp8,0,0.08684266606966655
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,4,2,128,1,float16,float16,0,0.09344533085823059
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,4,2,128,1,float16,fp8,0,0.09297600388526917
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,float16,0,0.11179199814796448
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,4,4,128,1,float16,fp8,0,0.11558399597803752
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,4,1,128,1,float16,float16,0,0.6071626742680868
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,4,1,128,1,float16,fp8,0,0.5512106815973917
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,4,2,128,1,float16,float16,0,0.7410079638163248
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,4,1,128,1,float16,float16,0,0.33027732372283936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,float16,0,0.6786666711171468
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,4,2,128,1,float16,fp8,0,0.7162026564280192
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,0.6218613386154175
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,4,1,128,1,float16,fp8,0,0.30204800764719647
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,4,2,128,1,float16,float16,0,0.3919999996821086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,4,2,128,1,float16,fp8,0,0.38131733735402423
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,float16,0,0.6895999908447266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,float16,0,0.3667893409729004
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,4,1,128,1,float16,float16,0,0.18117332458496094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,4,4,128,1,float16,fp8,0,0.7292906443277994
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,0.42236268520355225
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,4,1,128,1,float16,fp8,0,0.1646719972292582
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,4,2,128,1,float16,fp8,0,0.2018666664759318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,4,2,128,1,float16,float16,0,0.23070400953292847
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,float16,0,0.39160001277923584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,4,4,128,1,float16,fp8,0,0.3638453483581543
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,0.1871359944343567
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,float16,0,0.20480533440907797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,4,1,128,1,float16,fp8,0,0.10585066676139832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,4,1,128,1,float16,float16,0,0.11190932989120483
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,4,2,128,1,float16,float16,0,0.12346667051315308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,4,2,128,1,float16,fp8,0,0.12468799948692322
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,float16,0,0.20028799772262573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,4,4,128,1,float16,fp8,0,0.18275733788808188
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.10826667149861653
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,float16,0,0.0979253351688385
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,4,1,128,1,float16,float16,0,0.06841599941253662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,4,2,128,1,float16,fp8,0,0.07786133388678233
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,4,2,128,1,float16,float16,0,0.07447466750939687
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,4,1,128,1,float16,fp8,0,0.06817600131034851
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,float16,0,0.09795199831326802
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,4,4,128,1,float16,fp8,0,0.10947733124097188
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,float16,0,0.07667199770609538
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.08083199958006541
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,4,1,128,1,float16,float16,0,0.062080000837643944
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,4,1,128,1,float16,fp8,0,0.06241600215435028
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,4,2,128,1,float16,float16,0,0.06624533236026764
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,4,2,128,1,float16,fp8,0,0.06646400193373363
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,float16,0,0.07634666562080383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,4,4,128,1,float16,fp8,0,0.08111999928951263
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,4,1,128,1,float16,float16,0,0.38975465297698975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,4,1,128,1,float16,fp8,0,0.3619306484858195
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,4,2,128,1,float16,float16,0,0.4990080197652181
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,4,2,128,1,float16,fp8,0,0.4918186664581299
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,4,1,128,1,float16,fp8,0,0.1999359925587972
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,4,1,128,1,float16,float16,0,0.21850132942199707
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,0.5162080128987631
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,float16,0,0.5209493239720663
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,4,2,128,1,float16,float16,0,0.26688534021377563
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,4,2,128,1,float16,fp8,0,0.26475199063618976
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,float16,0,0.4957386652628581
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,4,4,128,1,float16,fp8,0,0.5654826561609904
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,0.2638826568921407
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,float16,0,0.27146132787068683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,4,1,128,1,float16,float16,0,0.11615467071533203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,4,1,128,1,float16,fp8,0,0.1123466690381368
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,4,2,128,1,float16,float16,0,0.14507200320561728
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,4,2,128,1,float16,fp8,0,0.13366933663686117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,float16,0,0.27005332708358765
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,4,4,128,1,float16,fp8,0,0.277402659257253
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,float16,0,0.12243733803431193
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.13437333703041077
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,4,1,128,1,float16,float16,0,0.07496533294518788
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,4,1,128,1,float16,fp8,0,0.07246933380762736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,4,2,128,1,float16,float16,0,0.08664533495903015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,4,2,128,1,float16,fp8,0,0.08710933725039165
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,float16,0,0.12038933237393697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,4,4,128,1,float16,fp8,0,0.13454399506251016
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,float16,0,0.07515733440717061
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.0828906645377477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,4,1,128,1,float16,float16,0,0.05425066749254862
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,4,1,128,1,float16,fp8,0,0.053743998209635414
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,4,2,128,1,float16,float16,0,0.058149332801500954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,4,2,128,1,float16,fp8,0,0.060032000144322716
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,float16,0,0.07494933406511943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,4,4,128,1,float16,fp8,0,0.08265600105126698
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,float16,0,0.06030400097370148
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.06414933502674103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,4,1,128,1,float16,float16,0,0.04981866478919983
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,4,1,128,1,float16,fp8,0,0.05004799862702688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,4,2,128,1,float16,float16,0,0.052202666799227394
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,4,2,128,1,float16,fp8,0,0.052757332722345986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,float16,0,0.06038400034109751
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,4,4,128,1,float16,fp8,0,0.0644160012404124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,4,1,128,1,float16,float16,0,0.4127093156178792
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,4,1,128,1,float16,fp8,0,0.38736534118652344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,4,2,128,1,float16,float16,0,0.5392106771469116
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,4,2,128,1,float16,fp8,0,0.5493866602579752
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,float16,0,0.565775990486145
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,0.5766986608505249
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,4,1,128,1,float16,float16,0,0.22069867451985678
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,4,1,128,1,float16,fp8,0,0.20827200015385947
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,4,2,128,1,float16,float16,0,0.28693334261576336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,float16,0,0.5689813296000162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,4,4,128,1,float16,fp8,0,0.5499786535898844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,4,2,128,1,float16,fp8,0,0.2964373429616292
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,float16,0,0.29978134234746295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,4,1,128,1,float16,float16,0,0.12007466952006023
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,4,1,128,1,float16,fp8,0,0.10809600353240967
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,0.307093342145284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,4,2,128,1,float16,float16,0,0.1653279960155487
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,4,2,128,1,float16,fp8,0,0.14797332882881165
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,float16,0,0.31571199496587116
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,float16,0,0.16711467504501343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,4,4,128,1,float16,fp8,0,0.308133323987325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.14390400052070618
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,4,1,128,1,float16,float16,0,0.07036266724268596
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,4,1,128,1,float16,fp8,0,0.07069866855939229
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,4,2,128,1,float16,float16,0,0.08407466610272725
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,4,2,128,1,float16,fp8,0,0.08866133292516072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,fp8,0,0.15385599931081137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,float16,0,0.07292800148328145
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,4,4,128,1,float16,float16,0,0.1633333365122477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.08658132950464885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,4,1,128,1,float16,float16,0,0.044256001710891724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,4,1,128,1,float16,fp8,0,0.044309332966804504
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,4,2,128,1,float16,float16,0,0.050069332122802734
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,4,2,128,1,float16,fp8,0,0.054560000697771706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,float16,0,0.07238933444023132
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,4,4,128,1,float16,fp8,0,0.08506666620572408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,float16,0,0.05207466582457224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.060032000144322716
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,4,1,128,1,float16,float16,0,0.039520000418027244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,4,1,128,1,float16,fp8,0,0.03963200002908707
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,4,2,128,1,float16,float16,0,0.04181866844495138
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,4,2,128,1,float16,fp8,0,0.04394133388996124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,fp8,0,0.059792002042134605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,4,4,128,1,float16,float16,0,0.05411200225353241
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,float16,0,0.044138665000597634
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.04614933331807455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,4,1,128,1,float16,float16,0,0.03753600021203359
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,4,1,128,1,float16,fp8,0,0.03736533224582672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,4,2,128,1,float16,float16,0,0.03764266769091288
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,4,2,128,1,float16,fp8,0,0.03947199881076813
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,fp8,0,0.048021331429481506
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,4,4,128,1,float16,float16,0,0.04402133325735728
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,4,1,128,1,float16,float16,0,0.27645333607991535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,4,1,128,1,float16,fp8,0,0.26401599248250324
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,4,2,128,1,float16,float16,0,0.3743893305460612
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,4,2,128,1,float16,fp8,0,0.39052800337473553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,float16,0,0.44840534528096515
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,0.4578666687011719
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,4,1,128,1,float16,float16,0,0.15339199701944986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,4,1,128,1,float16,fp8,0,0.13798399766286215
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,4,2,128,1,float16,float16,0,0.2010506590207418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,4,2,128,1,float16,fp8,0,0.20518400271733603
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,float16,0,0.4197760025660197
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,4,4,128,1,float16,fp8,0,0.459173321723938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,float16,0,0.22433066368103027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.2317813237508138
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,4,1,128,1,float16,float16,0,0.07973866661389668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,4,1,128,1,float16,fp8,0,0.08065600196520488
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,4,2,128,1,float16,float16,0,0.10945600271224976
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,4,2,128,1,float16,fp8,0,0.10146133104960124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,float16,0,0.22927467028299967
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,4,4,128,1,float16,fp8,0,0.23903467257817587
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,float16,0,0.09526399771372478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.11343466242154439
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,4,1,128,1,float16,float16,0,0.052095999320348106
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,4,1,128,1,float16,fp8,0,0.05186133086681366
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,4,2,128,1,float16,float16,0,0.06162666777769724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,4,2,128,1,float16,fp8,0,0.06617600222428639
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,float16,0,0.09714133540789287
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,4,4,128,1,float16,fp8,0,0.1139359970887502
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,float16,0,0.05618133147557577
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.06653333206971486
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,4,1,128,1,float16,float16,0,0.03568000098069509
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,4,1,128,1,float16,fp8,0,0.03565333286921183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,4,2,128,1,float16,float16,0,0.04058133314053217
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,4,2,128,1,float16,fp8,0,0.04388799766699473
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,float16,0,0.05596800148487091
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,4,4,128,1,float16,fp8,0,0.06654400130112965
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,float16,0,0.04358399907747904
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,4,1,128,1,float16,float16,0,0.032613334556420646
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.047781333327293396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,4,1,128,1,float16,fp8,0,0.03335466732581457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,4,2,128,1,float16,float16,0,0.035599999129772186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,4,2,128,1,float16,fp8,0,0.03566933423280716
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,float16,0,0.043562665581703186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,4,4,128,1,float16,fp8,0,0.0476800004641215
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,float16,0,0.03577066709597906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.037658666570981346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,4,1,128,1,float16,float16,0,0.031343999008337654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,4,1,128,1,float16,fp8,0,0.03124266614516576
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,4,2,128,1,float16,float16,0,0.03190933416287104
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,4,2,128,1,float16,fp8,0,0.03344533344109853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,float16,0,0.035573333501815796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,4,4,128,1,float16,fp8,0,0.03794133414824804
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,4,1,128,1,float16,float16,0,0.3124693234761556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,4,1,128,1,float16,fp8,0,0.3022986650466919
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,4,2,128,1,float16,float16,0,0.4391520023345947
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,float16,0,0.5599413315455118
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,fp8,0,0.6386186679204305
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,4,2,128,1,float16,fp8,0,0.4644800027211507
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,4,1,128,1,float16,fp8,0,0.16063466668128967
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,4,1,128,1,float16,float16,0,0.1695680022239685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,4,2,128,1,float16,float16,0,0.2323519984881083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,4,2,128,1,float16,fp8,0,0.247488001982371
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,float16,0,0.26394667228062946
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,float16,0,0.5688000122706095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,4,4,128,1,float16,fp8,0,0.48598400751749676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.30246933301289874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,4,1,128,1,float16,float16,0,0.08698667089144389
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,4,2,128,1,float16,fp8,0,0.1267519990603129
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,4,1,128,1,float16,fp8,0,0.08060800035794576
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,4,2,128,1,float16,float16,0,0.13159466783205667
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,float16,0,0.264789342880249
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,float16,0,0.14079466462135315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.13477333386739096
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,4,4,128,1,float16,fp8,0,0.2977386713027954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,4,1,128,1,float16,float16,0,0.05202133456865946
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,4,1,128,1,float16,fp8,0,0.05314133564631144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,4,2,128,1,float16,float16,0,0.06403199831644694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,4,2,128,1,float16,fp8,0,0.06958400209744771
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,float16,0,0.060133333007494606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,fp8,0,0.12943466504414877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,4,4,128,1,float16,float16,0,0.1360373298327128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.07445866862932841
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,4,1,128,1,float16,float16,0,0.03136000037193298
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,4,2,128,1,float16,float16,0,0.0394400010506312
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,4,1,128,1,float16,fp8,0,0.03332799921433131
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,4,2,128,1,float16,fp8,0,0.04306666553020477
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,float16,0,0.06017066538333893
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.048245335618654885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,float16,0,0.04170133173465729
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,4,4,128,1,float16,fp8,0,0.07445333401362102
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,4,1,128,1,float16,float16,0,0.027232001225153606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,4,1,128,1,float16,fp8,0,0.027514666318893433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,4,2,128,1,float16,float16,0,0.03140799949566523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,4,2,128,1,float16,fp8,0,0.033488000432650246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,float16,0,0.04164266586303711
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,4,4,128,1,float16,fp8,0,0.047983999053637184
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,float16,0,0.03319466610749563
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.035802667339642845
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,4,1,128,1,float16,float16,0,0.025301332275072735
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,4,1,128,1,float16,fp8,0,0.025450666745503742
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,4,2,128,1,float16,float16,0,0.027429332335789997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,4,2,128,1,float16,fp8,0,0.027295999228954315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,float16,0,0.03211733450492223
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,4,4,128,1,float16,fp8,0,0.035749333600203194
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.029338667790095013
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,float16,0,0.027402666707833607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,4,1,128,1,float16,float16,0,0.02521066615978877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,4,1,128,1,float16,fp8,0,0.023391999304294586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,4,2,128,1,float16,float16,0,0.025621332228183746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,4,2,128,1,float16,fp8,0,0.025573333104451496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,float16,0,0.027402666707833607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,4,4,128,1,float16,fp8,0,0.02959466725587845
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,4,1,128,1,float16,fp8,0,0.25814932584762573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,4,1,128,1,float16,float16,0,0.2678613265355428
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,4,2,128,1,float16,fp8,0,0.4215039809544881
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,4,2,128,1,float16,float16,0,0.40305598576863605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,float16,0,0.5413866837819418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.6104160149892172
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,4,1,128,1,float16,float16,0,0.14401599764823914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,4,1,128,1,float16,fp8,0,0.1405173341433207
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,4,2,128,1,float16,float16,0,0.20997865994771323
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,4,2,128,1,float16,fp8,0,0.22327999273935953
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,float16,0,0.5430613358815511
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,float16,0,0.26820266246795654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,4,4,128,1,float16,fp8,0,0.46401600042978924
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.28305600086847943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,4,1,128,1,float16,fp8,0,0.06637866795063019
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,4,1,128,1,float16,float16,0,0.0693280001481374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,4,2,128,1,float16,float16,0,0.11926933129628499
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,4,2,128,1,float16,fp8,0,0.11156266927719116
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,float16,0,0.12166933218638103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,float16,0,0.26656534274419147
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,4,4,128,1,float16,fp8,0,0.25330666700998944
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.12149866422017415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,4,1,128,1,float16,float16,0,0.04171733558177948
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,4,1,128,1,float16,fp8,0,0.043824002146720886
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,4,2,128,1,float16,float16,0,0.05412800113360087
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,4,2,128,1,float16,fp8,0,0.06185600161552429
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,float16,0,0.1225333313147227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,4,4,128,1,float16,fp8,0,0.12206400434176128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,float16,0,0.054192001620928444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.06844266752401988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,4,1,128,1,float16,float16,0,0.02700799951950709
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,4,1,128,1,float16,fp8,0,0.027424000203609467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,4,2,128,1,float16,float16,0,0.032816000282764435
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,4,2,128,1,float16,fp8,0,0.037861332297325134
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,float16,0,0.054048001766204834
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,4,4,128,1,float16,fp8,0,0.06883733471234639
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,float16,0,0.035749333600203194
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.04190933207670847
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,4,1,128,1,float16,float16,0,0.021151999632517498
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,4,1,128,1,float16,fp8,0,0.023413332800070446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,4,2,128,1,float16,float16,0,0.02513599892457326
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,4,2,128,1,float16,fp8,0,0.027477333943049114
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,float16,0,0.03563733398914337
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,4,4,128,1,float16,fp8,0,0.04182933270931244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,float16,0,0.027274665733178455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.029616000751654308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,4,1,128,1,float16,float16,0,0.019440000255902607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,4,1,128,1,float16,fp8,0,0.019882666567961376
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,4,2,128,1,float16,fp8,0,0.023077333966890972
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,4,2,128,1,float16,float16,0,0.02181333303451538
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,float16,0,0.027349332968393963
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,4,4,128,1,float16,fp8,0,0.02956266701221466
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,float16,0,0.023013333479563396
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,4,1,128,1,float16,float16,0,0.01916266605257988
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,4,1,128,1,float16,fp8,0,0.01912533367673556
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.024351999163627625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,4,2,128,1,float16,float16,0,0.019354666272799175
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,4,2,128,1,float16,fp8,0,0.019280000279347103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,float16,0,0.023168000082174938
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,float16,0,0.019365333020687103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,4,4,128,1,float16,fp8,0,0.024330665667851765
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.02102400114138921
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,4,1,128,1,float16,float16,0,0.01777600000301997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,4,1,128,1,float16,fp8,0,0.018906666586796444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,4,2,128,1,float16,float16,0,0.019519999623298645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,4,2,128,1,float16,fp8,0,0.017312000195185345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,float16,0,0.019146667172511418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,4,4,128,1,float16,fp8,0,0.02041600023706754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,4,1,128,1,float16,fp8,0,0.1197706659634908
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,4,1,128,1,float16,float16,0,0.12315733234087627
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,4,2,128,1,float16,fp8,0,0.20777066548665366
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,4,2,128,1,float16,float16,0,0.19310933351516724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,float16,0,0.2577173312505086
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.24609067042668661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,4,1,128,1,float16,float16,0,0.06381333371003468
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,4,1,128,1,float16,fp8,0,0.06012799839178721
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,4,2,128,1,float16,float16,0,0.109333336353302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,4,2,128,1,float16,fp8,0,0.09910933176676433
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,float16,0,0.2611520091692607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,4,4,128,1,float16,fp8,0,0.2746079961458842
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,float16,0,0.11220266421635945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.11660266915957133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,4,1,128,1,float16,float16,0,0.03569599986076355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,4,1,128,1,float16,fp8,0,0.03792533278465271
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,4,2,128,1,float16,float16,0,0.04788800080617269
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,4,2,128,1,float16,fp8,0,0.05725333094596863
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,float16,0,0.051962668697039284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.06634666522343953
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,float16,0,0.12609066565831503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,4,4,128,1,float16,fp8,0,0.1167626678943634
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,4,1,128,1,float16,float16,0,0.023152001202106476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,4,1,128,1,float16,fp8,0,0.025487999121348064
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,4,2,128,1,float16,float16,0,0.02957333376010259
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,4,2,128,1,float16,fp8,0,0.03565333286921183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,fp8,0,0.06614933411280315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,4,4,128,1,float16,float16,0,0.05132266879081726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,float16,0,0.03341866781314214
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.03961600114901861
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,4,1,128,1,float16,float16,0,0.01889066646496455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,4,2,128,1,float16,float16,0,0.02311466634273529
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,4,1,128,1,float16,fp8,0,0.019482667247454327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,4,2,128,1,float16,fp8,0,0.025216000775496166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.02735466758410136
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,float16,0,0.023306667804718018
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,fp8,0,0.039701332648595176
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,4,4,128,1,float16,float16,0,0.03333866596221924
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,4,1,128,1,float16,float16,0,0.017221332838137943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,4,1,128,1,float16,fp8,0,0.017024000485738117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,4,2,128,1,float16,fp8,0,0.01942933350801468
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,4,2,128,1,float16,float16,0,0.01894933357834816
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,float16,0,0.02345066765944163
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.021397332350413006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,4,4,128,1,float16,fp8,0,0.027493332823117573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,float16,0,0.019445333629846573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,4,1,128,1,float16,float16,0,0.015194666882356008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,4,1,128,1,float16,fp8,0,0.015594666202863058
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,4,2,128,1,float16,fp8,0,0.01732800031701724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,4,2,128,1,float16,float16,0,0.017130666722853977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,float16,0,0.019088000059127808
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,4,4,128,1,float16,fp8,0,0.021344001094500225
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,float16,0,0.016970666746298473
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,4,1,128,1,float16,float16,0,0.015274666249752045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,4,1,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.01700266698996226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,4,2,128,1,float16,float16,0,0.015386667102575302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,4,2,128,1,float16,fp8,0,0.01533866673707962
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,float16,0,0.01706133286158244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,4,4,128,1,float16,fp8,0,0.017423999806245167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,float16,0,0.015135999768972397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.015013333410024643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,4,1,128,1,float16,float16,0,0.015216000378131866
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,4,1,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,4,2,128,1,float16,float16,0,0.015194666882356008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,4,2,128,1,float16,fp8,0,0.015344000111023584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,float16,0,0.015498666713635126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,4,4,128,1,float16,fp8,0,0.014975999792416891
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,4,1,128,1,float16,float16,0,0.0695306658744812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,4,1,128,1,float16,fp8,0,0.06419200201829274
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,4,2,128,1,float16,float16,0,0.11272000273068745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,float16,0,0.1328159968058268
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.11571199695269267
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,4,2,128,1,float16,fp8,0,0.10737599929173787
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,4,1,128,1,float16,fp8,0,0.04058666775623957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,4,1,128,1,float16,float16,0,0.03882133215665817
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,4,2,128,1,float16,float16,0,0.04991999765237173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,4,2,128,1,float16,fp8,0,0.05884799857934316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,float16,0,0.05226666728655497
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,float16,0,0.12736533085505167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.06650133430957794
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,4,1,128,1,float16,float16,0,0.023226665953795116
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,4,4,128,1,float16,fp8,0,0.11446932951609294
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,4,1,128,1,float16,fp8,0,0.025285333395004272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,4,2,128,1,float16,float16,0,0.03177600105603536
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,4,2,128,1,float16,fp8,0,0.03551466763019562
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,float16,0,0.03164266546567281
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,float16,0,0.05241066714127859
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,4,4,128,1,float16,fp8,0,0.06628799935181935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.03763733307520548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,4,1,128,1,float16,float16,0,0.01695466662446658
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,4,2,128,1,float16,float16,0,0.01913600042462349
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,4,1,128,1,float16,fp8,0,0.01743999992807706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,4,2,128,1,float16,fp8,0,0.02329600105683009
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,float16,0,0.03148799886306127
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,float16,0,0.021375998854637146
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,4,4,128,1,float16,fp8,0,0.03772799919048945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.025258667767047882
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,4,1,128,1,float16,float16,0,0.015024000157912573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,4,1,128,1,float16,fp8,0,0.015439999600251516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,4,2,128,1,float16,float16,0,0.01504533365368843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,4,2,128,1,float16,fp8,0,0.017157333592573803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,float16,0,0.021407999098300934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,float16,0,0.016842667013406754
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,4,4,128,1,float16,fp8,0,0.02532800038655599
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.018522666146357853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,4,1,128,1,float16,float16,0,0.013199999928474426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,4,1,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,4,2,128,1,float16,float16,0,0.014954666296641031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,4,2,128,1,float16,fp8,0,0.014970666418472925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,float16,0,0.017008000363906223
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,4,4,128,1,float16,fp8,0,0.018485333770513535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.015066667149464289
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,float16,0,0.014975999792416891
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,4,1,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,4,1,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,4,2,128,1,float16,float16,0,0.012997332960367203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,4,2,128,1,float16,fp8,0,0.012944000462690989
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,float16,0,0.01479999969402949
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,4,4,128,1,float16,fp8,0,0.015200000256299973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,float16,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,4,1,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,4,1,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,4,2,128,1,float16,float16,0,0.013151999562978745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,4,2,128,1,float16,fp8,0,0.013295999417702356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,float16,0,0.013189333180586496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,4,4,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,4,1,128,1,float16,float16,0,0.013183999806642532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,4,1,128,1,float16,fp8,0,0.012944000462690989
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,4,2,128,1,float16,float16,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,4,2,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,float16,0,0.013162666310866674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,4,4,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,4,1,128,1,float16,fp8,0,0.04785066843032837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,4,1,128,1,float16,float16,0,0.048250665267308555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,4,2,128,1,float16,float16,0,0.062218666076660156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,4,2,128,1,float16,fp8,0,0.06649599969387054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,float16,0,0.05806399881839752
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,4,1,128,1,float16,float16,0,0.029685333371162415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.07227733234564464
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,4,1,128,1,float16,fp8,0,0.03143466760714849
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,4,2,128,1,float16,float16,0,0.036533333361148834
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,4,2,128,1,float16,fp8,0,0.04125333329041799
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,float16,0,0.05825600028038025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,4,4,128,1,float16,fp8,0,0.07135466734568278
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.04121600091457367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,float16,0,0.03331200033426285
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,4,1,128,1,float16,float16,0,0.019253333409627277
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,4,1,128,1,float16,fp8,0,0.02102400114138921
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,4,2,128,1,float16,float16,0,0.02295999974012375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,4,2,128,1,float16,fp8,0,0.02552533398071925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,float16,0,0.021530665457248688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,float16,0,0.03375466664632162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,4,4,128,1,float16,fp8,0,0.03982933362325033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.025450666745503742
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,4,1,128,1,float16,float16,0,0.014943999548753103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,4,1,128,1,float16,fp8,0,0.015077333897352219
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,4,2,128,1,float16,float16,0,0.016965333372354507
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,4,2,128,1,float16,fp8,0,0.0173333336909612
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,float16,0,0.021210665504137676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,4,4,128,1,float16,fp8,0,0.025205334027608235
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,float16,0,0.016949333250522614
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.019050666441520054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,4,1,128,1,float16,fp8,0,0.01314666618903478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,4,1,128,1,float16,float16,0,0.013199999928474426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,4,2,128,1,float16,float16,0,0.013343999783198038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,4,2,128,1,float16,fp8,0,0.015274666249752045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,float16,0,0.017136000096797943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,4,4,128,1,float16,fp8,0,0.018992000569899876
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,float16,0,0.013104000439246496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.014864000181357065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,4,1,128,1,float16,float16,0,0.013157332936922709
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,4,1,128,1,float16,fp8,0,0.01258133351802826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,4,2,128,1,float16,float16,0,0.01293333371480306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,4,2,128,1,float16,fp8,0,0.013317332913478216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,4,4,128,1,float16,fp8,0,0.014720000326633453
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,float16,0,0.012885333349307379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,4,1,128,1,float16,fp8,0,0.012863999853531519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,4,1,128,1,float16,float16,0,0.012762666990359625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,4,2,128,1,float16,float16,0,0.013002666334311167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,4,2,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,float16,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,4,4,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,float16,0,0.012938667088747025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.012906666845083237
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,4,1,128,1,float16,float16,0,0.012784000486135483
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,4,1,128,1,float16,fp8,0,0.012746666868527731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,4,2,128,1,float16,float16,0,0.013066666821638743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,4,2,128,1,float16,fp8,0,0.01166933278242747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,float16,0,0.012853333105643591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,4,4,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,float16,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.012175999581813812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,4,1,128,1,float16,float16,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,4,1,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,4,2,128,1,float16,float16,0,0.012863999853531519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,4,2,128,1,float16,fp8,0,0.011296000331640244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,float16,0,0.012810666114091873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,4,4,128,1,float16,fp8,0,0.012917333592971167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,4,1,128,1,float16,fp8,0,0.03968533376852671
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,4,1,128,1,float16,float16,0,0.04192000130812327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,4,2,128,1,float16,float16,0,0.04828799764315287
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,4,2,128,1,float16,fp8,0,0.04850666721661886
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,float16,0,0.039893334110577904
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.04587199787298838
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,4,1,128,1,float16,float16,0,0.02553066611289978
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,4,1,128,1,float16,fp8,0,0.025285333395004272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,4,2,128,1,float16,float16,0,0.029450667401154835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,4,2,128,1,float16,fp8,0,0.03141866624355316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,float16,0,0.03976533313592275
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,4,4,128,1,float16,fp8,0,0.04578666885693868
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,float16,0,0.025077333052953083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.027477333943049114
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,4,1,128,1,float16,float16,0,0.017509333789348602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,4,1,128,1,float16,fp8,0,0.01709866647919019
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,4,2,128,1,float16,float16,0,0.019167999426523846
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,4,2,128,1,float16,fp8,0,0.019274666905403137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,float16,0,0.026650667190551758
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,4,4,128,1,float16,fp8,0,0.027215999861558277
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.01882133384545644
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,float16,0,0.017093333105246227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,4,1,128,1,float16,float16,0,0.013151999562978745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,4,1,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,4,2,128,1,float16,float16,0,0.015002666662136713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,4,2,128,1,float16,fp8,0,0.015103999525308609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,float16,0,0.016970666746298473
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,4,4,128,1,float16,fp8,0,0.018986667195955913
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.014997333288192749
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,float16,0,0.014943999548753103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,4,1,128,1,float16,float16,0,0.012736000120639801
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,4,1,128,1,float16,fp8,0,0.012549333274364471
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,4,2,128,1,float16,float16,0,0.01209066684047381
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,4,2,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,float16,0,0.014149333039919535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,4,4,128,1,float16,fp8,0,0.015029333531856537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,float16,0,0.011253333340088526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,4,1,128,1,float16,float16,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,4,1,128,1,float16,fp8,0,0.01258133351802826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,4,2,128,1,float16,float16,0,0.012831999609867731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,4,2,128,1,float16,fp8,0,0.011071999867757162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,fp8,0,0.01293333371480306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,4,4,128,1,float16,float16,0,0.012826666235923767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,float16,0,0.013007999708255133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.012789333860079447
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,4,1,128,1,float16,float16,0,0.010970667004585266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,4,1,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,4,2,128,1,float16,float16,0,0.011285333583752314
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,4,2,128,1,float16,fp8,0,0.012346666306257248
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,4,4,128,1,float16,fp8,0,0.013013333082199097
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,float16,0,0.011322667201360067
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,4,1,128,1,float16,float16,0,0.011231999844312668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,4,1,128,1,float16,fp8,0,0.011450666934251785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,4,2,128,1,float16,float16,0,0.011242666592200598
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,4,2,128,1,float16,fp8,0,0.01190399999419848
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,float16,0,0.011994666109482447
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,4,4,128,1,float16,fp8,0,0.012719999998807907
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,float16,0,0.011061333119869232
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.012560000022252401
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,4,1,128,1,float16,float16,0,0.010938666760921478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,4,1,128,1,float16,fp8,0,0.011226666470368704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,4,2,128,1,float16,float16,0,0.011205332974592844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,4,2,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,float16,0,0.01126933346192042
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,4,4,128,1,float16,fp8,0,0.013007999708255133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,4,1,128,1,float16,float16,0,0.037861332297325134
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,4,1,128,1,float16,fp8,0,0.03352533280849457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,4,2,128,1,float16,float16,0,0.04164266586303711
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,float16,0,0.03149333347876867
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.03336533407370249
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,4,2,128,1,float16,fp8,0,0.039605334401130676
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,4,1,128,1,float16,float16,0,0.02480533222357432
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,4,1,128,1,float16,fp8,0,0.023610666394233704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,4,2,128,1,float16,float16,0,0.025536000728607178
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,4,2,128,1,float16,fp8,0,0.025216000775496166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,float16,0,0.02107733239730199
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,fp8,0,0.03336533407370249
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,4,4,128,1,float16,float16,0,0.03161599983771642
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.021221332252025604
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,4,1,128,1,float16,float16,0,0.01706133286158244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,4,1,128,1,float16,fp8,0,0.016352000335852306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,4,2,128,1,float16,fp8,0,0.017125333348910015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,4,2,128,1,float16,float16,0,0.017573333034912746
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,float16,0,0.020992000897725422
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,4,4,128,1,float16,fp8,0,0.021087999145189922
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,float16,0,0.014874666929244995
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,4,1,128,1,float16,float16,0,0.013264000415802002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.015253332753976187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,4,1,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,4,2,128,1,float16,float16,0,0.01314666618903478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,4,2,128,1,float16,fp8,0,0.013056000073750814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,float16,0,0.015024000157912573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.012815999488035837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,float16,0,0.012341332932313284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,4,4,128,1,float16,fp8,0,0.015082667271296183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,4,1,128,1,float16,float16,0,0.012741333494583765
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,4,1,128,1,float16,fp8,0,0.012863999853531519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,4,2,128,1,float16,fp8,0,0.012650666137536367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,4,2,128,1,float16,float16,0,0.012938667088747025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,float16,0,0.013114667187134424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,4,4,128,1,float16,fp8,0,0.013157332936922709
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,float16,0,0.01129066695769628
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,4,1,128,1,float16,float16,0,0.013007999708255133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,4,1,128,1,float16,fp8,0,0.011973333855470022
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,4,2,128,1,float16,float16,0,0.011034666250149408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,4,2,128,1,float16,fp8,0,0.01118933285276095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,float16,0,0.012997332960367203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,4,4,128,1,float16,fp8,0,0.012800000607967377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.01102399950226148
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,float16,0,0.011130666981140772
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,4,1,128,1,float16,fp8,0,0.011264000087976456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,4,1,128,1,float16,float16,0,0.012831999609867731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,4,2,128,1,float16,float16,0,0.010960000256697336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,4,2,128,1,float16,fp8,0,0.012479999413092932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,float16,0,0.011141333729028702
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.012202666451533636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,4,4,128,1,float16,fp8,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,float16,0,0.011087999989589056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,4,1,128,1,float16,float16,0,0.01097600037852923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,4,1,128,1,float16,fp8,0,0.012736000120639801
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,4,2,128,1,float16,fp8,0,0.011296000331640244
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,4,2,128,1,float16,float16,0,0.016069332758585613
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,fp8,0,0.012815999488035837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,4,4,128,1,float16,float16,0,0.011071999867757162
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.011834666132926941
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,float16,0,0.011173332730929056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,4,1,128,1,float16,float16,0,0.012671999633312225
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,4,1,128,1,float16,fp8,0,0.01116266722480456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,4,2,128,1,float16,float16,0,0.01128000020980835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,4,2,128,1,float16,fp8,0,0.011381333072980246
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,float16,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,4,4,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,0,0.011231999844312668
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,0,0.012629333883523941
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,0,0.01303999995191892
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,0,0.014874666929244995
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,0,0.015061333775520325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.013327999661366144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,0,0.010944000134865442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,0,0.010757333288590113
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,0,0.01128000020980835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,0,0.012773333738247553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,0,0.015119999647140503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,0,0.012917333592971167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.011055999745925268
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,0,0.010645333677530289
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,0,0.011066666493813196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,0,0.011087999989589056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,0,0.010672000547250112
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,0,0.012250666817029318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,0,0.011029332876205444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,0,0.01071999967098236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.010698666175206503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,0,0.010901333143313726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,0,0.010751999914646149
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,0,0.009039999917149544
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,0,0.010805333654085795
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,0,0.010863999525705973
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,0,0.010885333021481832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,0,0.011034666250149408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.010784000158309937
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,0,0.010255999863147736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,0,0.009103999783595404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,0,0.009866666669646898
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,0,0.010970667004585266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,0,0.010666667173306147
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,0,0.010175999874869982
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.010762666662534079
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,0,0.010890666395425797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,0,0.009269333134094873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,0,0.00895999992887179
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,0,0.009509333098928133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,0,0.011141333729028702
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,0,0.009152000149091085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,0,0.009189333145817121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,0,0.009568000212311745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,0,0.008842666943868002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.011007999380429586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,0,0.008992000172535578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,0,0.0107893335322539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,0,0.009130666653315226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,0,0.010837333897749582
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,0,0.009813333551088968
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,0,0.010821333775917688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.00897066667675972
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,0,0.009008000294367472
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,0,0.010773333410422007
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,0,0.01071999967098236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,0,0.009173333023985228
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,0,0.009002666920423508
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,0,0.010687999427318573
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,0,0.011077333241701126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.012133333832025528
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,0,0.008943999807039896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,0,0.01071999967098236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,0,0.008933333059151968
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,0,0.010869332899649939
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,0,0.009253333633144697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,0,0.01081066702802976
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,float16,0,0.645306666692098
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,2,1,128,1,float16,float16,0,0.992415984471639
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16384,2,1,128,1,float16,fp8,0,0.8887253602345785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,0.5940373341242472
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,2,1,128,1,float16,fp8,0,0.506277322769165
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,2,1,128,1,float16,float16,0,0.572165330251058
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,fp8,0,0.5942346652348837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,2,2,128,1,float16,float16,0,0.6342026789983114
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,0.3410986661911011
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,float16,0,0.38654398918151855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,2,1,128,1,float16,float16,0,0.3503520091374715
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,2,1,128,1,float16,fp8,0,0.31539734204610187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,fp8,0,0.3412426710128784
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,2,2,128,1,float16,float16,0,0.3858026663462321
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,float16,0,0.2355413238207499
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,0.22497600317001343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,2,1,128,1,float16,float16,0,0.22010666131973267
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,2,1,128,1,float16,fp8,0,0.204693337281545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,float16,0,0.23572800556818643
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,2,2,128,1,float16,fp8,0,0.22459199031194052
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,float16,0,0.42395198345184326
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,2,1,128,1,float16,float16,0,0.6249759991963705
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,0.3987040122350057
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,12288,2,1,128,1,float16,fp8,0,0.5629013379414877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,2,1,128,1,float16,float16,0,0.36079998811086017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,2,1,128,1,float16,fp8,0,0.31997867425282794
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,fp8,0,0.39421331882476807
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,2,2,128,1,float16,float16,0,0.42106131712595624
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,float16,0,0.23779199520746866
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,0.23051732778549194
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,2,1,128,1,float16,float16,0,0.22240533431371054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,2,1,128,1,float16,fp8,0,0.2076693375905355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,fp8,0,0.23052799701690674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,float16,0,0.17463467518488565
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,0.17147733767827353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,2,2,128,1,float16,float16,0,0.23717333873112997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,2,1,128,1,float16,float16,0,0.16637866695721945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,2,1,128,1,float16,fp8,0,0.15569600462913513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,float16,0,0.17584532499313354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,2,2,128,1,float16,fp8,0,0.17059733470280966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,float16,0,0.3303520083427429
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,2,1,128,1,float16,float16,0,0.47362132867177326
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,10240,2,1,128,1,float16,fp8,0,0.43034132321675617
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,2,1,128,1,float16,float16,0,0.27243733406066895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,0.29391467571258545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,2,1,128,1,float16,fp8,0,0.2525013287862142
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,float16,0,0.1829920013745626
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,fp8,0,0.2944106658299764
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,2,2,128,1,float16,float16,0,0.3208000063896179
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,0.17639466126759848
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,2,1,128,1,float16,fp8,0,0.15421332915623984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,2,1,128,1,float16,float16,0,0.16597333550453186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,float16,0,0.18222934007644653
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,float16,0,0.14618666966756186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,2,2,128,1,float16,fp8,0,0.17612266540527344
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,2,1,128,1,float16,fp8,0,0.13288000226020813
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,2,1,128,1,float16,float16,0,0.13821867108345032
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.14401066303253174
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,fp8,0,0.14435199896494547
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,2,2,128,1,float16,float16,0,0.14620266358057657
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,2,1,128,1,float16,float16,0,0.5936319828033447
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,float16,0,0.40229864915211994
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,8192,2,1,128,1,float16,fp8,0,0.5442560116449991
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,0.39718401432037354
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,2,1,128,1,float16,float16,0,0.33742932478586835
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,2,1,128,1,float16,fp8,0,0.3060106635093689
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,float16,0,0.4079626798629761
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,2,2,128,1,float16,fp8,0,0.3906186819076538
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,float16,0,0.24041599035263062
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,0.21236799160639444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,2,1,128,1,float16,float16,0,0.19941866397857666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,2,1,128,1,float16,fp8,0,0.18768000602722168
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,float16,0,0.23933867613474527
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,float16,0,0.13885866602261862
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,2,2,128,1,float16,fp8,0,0.21421867609024048
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,0.138672004143397
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,2,1,128,1,float16,float16,0,0.124399999777476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,2,1,128,1,float16,fp8,0,0.11952533324559529
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,float16,0,0.1379146675268809
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,float16,0,0.11913067102432251
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,2,2,128,1,float16,fp8,0,0.138154665629069
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.11926933129628499
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,2,1,128,1,float16,float16,0,0.1113759974638621
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,2,1,128,1,float16,fp8,0,0.10929600397745769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,fp8,0,0.11932800213495891
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,2,2,128,1,float16,float16,0,0.11965333422025044
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,2,1,128,1,float16,float16,0,0.3852906624476115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,float16,0,0.2770613431930542
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,0.269487996896108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,6144,2,1,128,1,float16,fp8,0,0.35735468069712323
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,2,1,128,1,float16,float16,0,0.2162933349609375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,2,1,128,1,float16,fp8,0,0.19660800695419312
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,float16,0,0.27990933259328205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,2,2,128,1,float16,fp8,0,0.2707146604855855
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,float16,0,0.14667200048764548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,0.1537493367989858
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,2,1,128,1,float16,float16,0,0.13167466719945273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,2,1,128,1,float16,fp8,0,0.12532800436019897
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,float16,0,0.14643200238545737
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,2,2,128,1,float16,fp8,0,0.15241600076357523
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,float16,0,0.1050933301448822
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.10521066188812256
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,2,1,128,1,float16,float16,0,0.09313066800435384
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,2,1,128,1,float16,fp8,0,0.09293867150942485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,fp8,0,0.10523733496665955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,2,2,128,1,float16,float16,0,0.1046560009320577
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,float16,0,0.09104532996813457
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.09275733431180318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,2,1,128,1,float16,fp8,0,0.08503466844558716
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,2,1,128,1,float16,float16,0,0.08610666791598003
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,float16,0,0.09140800436337788
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,2,2,128,1,float16,fp8,0,0.09278399745623271
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,float16,0,0.28140799204508465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,0.28750399748484295
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,2,1,128,1,float16,float16,0,0.3902133305867513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,4096,2,1,128,1,float16,fp8,0,0.36753066380818683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,2,1,128,1,float16,float16,0,0.2164799968401591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,2,1,128,1,float16,fp8,0,0.20149334271748862
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,float16,0,0.2817759911219279
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,2,2,128,1,float16,fp8,0,0.2918826738993327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,float16,0,0.1634666621685028
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.14775466918945312
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,2,1,128,1,float16,float16,0,0.1220266620318095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,2,1,128,1,float16,fp8,0,0.11926933129628499
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,float16,0,0.1637226641178131
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,2,2,128,1,float16,fp8,0,0.14774399995803833
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.09305066863695781
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,float16,0,0.08875200152397156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,2,1,128,1,float16,float16,0,0.0743146687746048
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,2,1,128,1,float16,fp8,0,0.07589866717656453
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,float16,0,0.0885599950949351
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,2,2,128,1,float16,fp8,0,0.09344533085823059
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,float16,0,0.07232533395290375
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.07657066484292348
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,2,1,128,1,float16,float16,0,0.06529599924882253
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,2,1,128,1,float16,fp8,0,0.06623999774456024
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,float16,0,0.07233599821726482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,2,2,128,1,float16,fp8,0,0.07580266892910004
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,float16,0,0.06435200075308482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.06639466683069865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,2,1,128,1,float16,float16,0,0.06217599908510844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,2,1,128,1,float16,fp8,0,0.06073066592216492
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,float16,0,0.06492800017197926
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,2,2,128,1,float16,fp8,0,0.06658133367697398
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,float16,0,0.20273600021998087
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,2,1,128,1,float16,float16,0,0.26079465945561725
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,3072,2,1,128,1,float16,fp8,0,0.2536639968554179
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,0.20287466049194336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,2,1,128,1,float16,float16,0,0.14059199889500937
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,2,1,128,1,float16,fp8,0,0.1291253368059794
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,float16,0,0.20416533946990967
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,float16,0,0.09988266229629517
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,2,2,128,1,float16,fp8,0,0.20680000384648642
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.10896533727645874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,2,1,128,1,float16,float16,0,0.08408000071843465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,2,1,128,1,float16,fp8,0,0.08416533470153809
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,float16,0,0.09841600060462952
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,float16,0,0.06845333178838094
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,2,2,128,1,float16,fp8,0,0.10894933342933655
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.0724533349275589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,2,1,128,1,float16,float16,0,0.05820799867312113
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,2,1,128,1,float16,fp8,0,0.05794133245944977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,float16,0,0.06768533090750377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,2,2,128,1,float16,fp8,0,0.07435733576615651
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,float16,0,0.05624533196290334
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.060133333007494606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,2,1,128,1,float16,float16,0,0.05249600112438202
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,2,1,128,1,float16,fp8,0,0.05203733344872793
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,float16,0,0.05665599803129832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,float16,0,0.051685333251953125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,2,2,128,1,float16,fp8,0,0.06035199761390686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.05365866422653198
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,2,1,128,1,float16,float16,0,0.04969066878159841
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,2,1,128,1,float16,fp8,0,0.04972266654173533
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,float16,0,0.051914667089780174
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,2,2,128,1,float16,fp8,0,0.0543146679798762
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,float16,0,0.21927465995152792
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,2,1,128,1,float16,float16,0,0.2776479919751485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,2048,2,1,128,1,float16,fp8,0,0.2744693358739217
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,0.23155200481414795
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,2,1,128,1,float16,float16,0,0.15506666898727417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,2,1,128,1,float16,fp8,0,0.14033599694569907
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,float16,0,0.2278560002644857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,float16,0,0.12210667133331299
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,2,2,128,1,float16,fp8,0,0.23259733120600382
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.10943999886512756
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,2,1,128,1,float16,fp8,0,0.08400000135103862
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,2,1,128,1,float16,float16,0,0.08179200192292531
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,float16,0,0.11865599950154622
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.07025066514809926
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,float16,0,0.06337066491444905
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,2,2,128,1,float16,fp8,0,0.11077333490053813
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,2,1,128,1,float16,fp8,0,0.05251200000445048
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,float16,0,0.06394133468468984
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,2,1,128,1,float16,float16,0,0.05026666820049286
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,2,2,128,1,float16,fp8,0,0.07044266661008199
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,float16,0,0.05014933149019877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.05365866422653198
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,2,1,128,1,float16,fp8,0,0.043807998299598694
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,2,1,128,1,float16,float16,0,0.04219200213750204
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,float16,0,0.0498933345079422
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,2,2,128,1,float16,fp8,0,0.0539626677831014
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,float16,0,0.041663999358812966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.043893332282702126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,2,1,128,1,float16,float16,0,0.039503999054431915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,2,1,128,1,float16,fp8,0,0.038047999143600464
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,float16,0,0.04165333261092504
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,2,2,128,1,float16,fp8,0,0.04427200059096018
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,float16,0,0.03958933303753535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.0376800000667572
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,2,1,128,1,float16,float16,0,0.0367253323396047
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,2,1,128,1,float16,fp8,0,0.037589333951473236
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,float16,0,0.039461334546407066
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,2,2,128,1,float16,fp8,0,0.03940266619126002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.1653439998626709
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,float16,0,0.16477866967519125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,2,1,128,1,float16,float16,0,0.1960373322168986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1536,2,1,128,1,float16,fp8,0,0.1972000002861023
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,2,1,128,1,float16,float16,0,0.10309333602587382
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,2,1,128,1,float16,fp8,0,0.0960106650988261
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,float16,0,0.1694773236910502
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,2,2,128,1,float16,fp8,0,0.1710240046183268
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,float16,0,0.07506666580835979
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.08782933155695598
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,2,1,128,1,float16,float16,0,0.060080001751581825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,2,1,128,1,float16,fp8,0,0.06366933385531108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,float16,0,0.07472533484299977
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,float16,0,0.05021866659323374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,2,2,128,1,float16,fp8,0,0.08756267031033833
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.05810666580994924
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,2,1,128,1,float16,fp8,0,0.04154666761557261
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,2,1,128,1,float16,float16,0,0.03998400022586187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,float16,0,0.04966933528582255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,2,2,128,1,float16,fp8,0,0.05658666789531708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,float16,0,0.03952533255020777
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.04375466704368591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,2,1,128,1,float16,float16,0,0.035429333647092186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,2,1,128,1,float16,fp8,0,0.035546667873859406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,float16,0,0.039919999738534294
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,2,2,128,1,float16,fp8,0,0.04358933369318644
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,float16,0,0.035573333501815796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.0378560001651446
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,2,1,128,1,float16,float16,0,0.03141866624355316
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,2,1,128,1,float16,fp8,0,0.03332799921433131
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,float16,0,0.03538133452335993
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,2,2,128,1,float16,fp8,0,0.03578133384386698
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,float16,0,0.031498665610949196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.033285332222779594
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,2,1,128,1,float16,float16,0,0.03151999910672506
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,2,1,128,1,float16,fp8,0,0.03141333411137263
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,float16,0,0.03178133318821589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,2,2,128,1,float16,fp8,0,0.033557333052158356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,float16,0,0.18918399016062418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,2,1,128,1,float16,float16,0,0.22344533602396646
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1024,2,1,128,1,float16,fp8,0,0.22694400946299234
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.20377600193023682
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,2,1,128,1,float16,float16,0,0.12157866358757019
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,2,1,128,1,float16,fp8,0,0.11989866693814595
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,float16,0,0.18845866123835245
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,float16,0,0.10080533226331075
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,2,2,128,1,float16,fp8,0,0.2108479936917623
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.09195199608802795
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,2,1,128,1,float16,float16,0,0.061573331554730736
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,2,1,128,1,float16,fp8,0,0.06620799998442332
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,fp8,0,0.09257599711418152
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,float16,0,0.05184000233809153
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.059343998630841575
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,2,2,128,1,float16,float16,0,0.10018666585286458
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,2,1,128,1,float16,float16,0,0.03756800045569738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,2,1,128,1,float16,fp8,0,0.04155733436346054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,float16,0,0.0518453319867452
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,float16,0,0.037674665451049805
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.04192000130812327
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,2,2,128,1,float16,fp8,0,0.06035199761390686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,2,1,128,1,float16,float16,0,0.03148799886306127
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,2,1,128,1,float16,fp8,0,0.03158933420976003
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,float16,0,0.03771200031042099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,float16,0,0.031370667119820915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.03311466674009959
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,2,2,128,1,float16,fp8,0,0.04207466542720795
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,2,1,128,1,float16,float16,0,0.027535999814669292
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,2,1,128,1,float16,fp8,0,0.027658666173617046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,float16,0,0.029498666524887085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,float16,0,0.027301333844661713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,2,2,128,1,float16,fp8,0,0.03400533397992452
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.02757333219051361
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,2,1,128,1,float16,float16,0,0.02526933451493581
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,2,1,128,1,float16,fp8,0,0.025253333151340485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,float16,0,0.02756800005833308
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,2,2,128,1,float16,fp8,0,0.027589333554108936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,float16,0,0.025546667476495106
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.025562666356563568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,2,1,128,1,float16,float16,0,0.025279998779296875
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,2,1,128,1,float16,fp8,0,0.02345066765944163
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,float16,0,0.02569066733121872
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,2,2,128,1,float16,fp8,0,0.025568000972270966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,float16,0,0.17306133111317953
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.18962132930755615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,2,1,128,1,float16,fp8,0,0.20327999194463095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,2,1,128,1,float16,float16,0,0.10841600100199382
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,2,1,128,1,float16,fp8,0,0.10546132922172546
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,512,2,1,128,1,float16,float16,0,0.19962666432062784
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,float16,0,0.08996267120043437
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.08905599514643352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,float16,0,0.17238932847976685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,2,2,128,1,float16,fp8,0,0.19578667481740317
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,2,1,128,1,float16,float16,0,0.0517546683549881
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,2,1,128,1,float16,fp8,0,0.058090666929880776
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,float16,0,0.08991466959317525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,2,2,128,1,float16,fp8,0,0.08481599887212117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,float16,0,0.04623466730117798
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.05376533170541128
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,2,1,128,1,float16,float16,0,0.031386665999889374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,2,1,128,1,float16,fp8,0,0.035418666899204254
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,float16,0,0.04599999884764353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,2,2,128,1,float16,fp8,0,0.05399466554323832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,float16,0,0.03141333411137263
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,2,1,128,1,float16,float16,0,0.02548266698916753
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.03759466608365377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,2,1,128,1,float16,fp8,0,0.02733866622050603
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,float16,0,0.031370667119820915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,float16,0,0.02518933266401291
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,2,2,128,1,float16,fp8,0,0.03793066740036011
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,2,1,128,1,float16,float16,0,0.021136000752449036
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.027488000690937042
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,2,1,128,1,float16,fp8,0,0.022426667312781017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,float16,0,0.025455998877684276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,float16,0,0.02128533273935318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,2,2,128,1,float16,fp8,0,0.027248000105222065
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.02248000105222066
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,2,1,128,1,float16,float16,0,0.019306667149066925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,2,1,128,1,float16,fp8,0,0.019002666076024372
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,float16,0,0.02128533273935318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,2,2,128,1,float16,fp8,0,0.022490667800108593
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,float16,0,0.019205333044131596
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.019424000134070713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,2,1,128,1,float16,float16,0,0.01922133316596349
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,2,1,128,1,float16,fp8,0,0.01878400022784869
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,float16,0,0.019386666516462963
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,2,2,128,1,float16,fp8,0,0.01941866676012675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,float16,0,0.018816000471512478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.018874666343132656
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,2,1,128,1,float16,float16,0,0.018853332847356796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,2,1,128,1,float16,fp8,0,0.017407999684413273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,float16,0,0.018826667219400406
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,2,2,128,1,float16,fp8,0,0.018960000326236088
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,float16,0,0.08396800359090169
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,2,1,128,1,float16,float16,0,0.09840533137321472
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,256,2,1,128,1,float16,fp8,0,0.09699733058611552
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.07806933422883351
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,2,1,128,1,float16,float16,0,0.04582933088143667
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,2,1,128,1,float16,fp8,0,0.05259199937184652
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,float16,0,0.08418666323026021
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,float16,0,0.04247466723124186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,2,1,128,1,float16,float16,0,0.029264000554879505
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,2,2,128,1,float16,fp8,0,0.07732800145943959
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.04974400003751119
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,2,1,128,1,float16,fp8,0,0.03356799980004629
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,float16,0,0.04247466723124186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,float16,0,0.029343999922275543
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,2,2,128,1,float16,fp8,0,0.05080533524354299
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.034645333886146545
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,2,1,128,1,float16,float16,0,0.02298133323589961
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,2,1,128,1,float16,fp8,0,0.02462933212518692
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,float16,0,0.021594665944576263
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,fp8,0,0.033520000676314034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,2,2,128,1,float16,float16,0,0.029322666426499683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.025370667378107708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,2,1,128,1,float16,float16,0,0.01887999971707662
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,2,1,128,1,float16,fp8,0,0.019381333142518997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,float16,0,0.02126399924357732
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,2,2,128,1,float16,fp8,0,0.025413334369659424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,float16,0,0.01905599981546402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.019424000134070713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,2,1,128,1,float16,float16,0,0.017184000462293625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,2,1,128,1,float16,fp8,0,0.01722666621208191
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,float16,0,0.01884799947341283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,2,2,128,1,float16,fp8,0,0.019365333020687103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,float16,0,0.017157333592573803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.0170666662355264
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,2,1,128,1,float16,float16,0,0.016885332763195038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,2,1,128,1,float16,fp8,0,0.01504533365368843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,float16,0,0.01718933383623759
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,2,2,128,1,float16,fp8,0,0.01714666684468587
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,float16,0,0.015210667004187902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.015034666905800501
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,2,1,128,1,float16,float16,0,0.014954666296641031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,2,1,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,fp8,0,0.015360000232855478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,2,2,128,1,float16,float16,0,0.015285332997639975
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,float16,0,0.014997333288192749
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,2,1,128,1,float16,float16,0,0.015317333241303762
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,2,1,128,1,float16,fp8,0,0.015253332753976187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,float16,0,0.01523200049996376
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,2,2,128,1,float16,fp8,0,0.015210667004187902
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,float16,0,0.04365866879622141
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.05228800078233083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,2,1,128,1,float16,fp8,0,0.05576533575852712
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,128,2,1,128,1,float16,float16,0,0.047728002071380615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,2,1,128,1,float16,float16,0,0.029530666768550873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,2,1,128,1,float16,fp8,0,0.03358400116364161
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,float16,0,0.02741333345572154
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.03166933357715607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,float16,0,0.043562665581703186
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,2,2,128,1,float16,fp8,0,0.051738664507865906
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,2,1,128,1,float16,float16,0,0.019152000546455383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,2,1,128,1,float16,fp8,0,0.022778667509555817
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,float16,0,0.027098665634791057
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,2,2,128,1,float16,fp8,0,0.03173866619666418
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,float16,0,0.01905599981546402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.021189334491888683
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,2,1,128,1,float16,float16,0,0.016901332885026932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,2,1,128,1,float16,fp8,0,0.017290666699409485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,fp8,0,0.02128533273935318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,2,2,128,1,float16,float16,0,0.0194560003777345
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,float16,0,0.015173333386580149
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.01740266631046931
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,2,1,128,1,float16,float16,0,0.013861333330472311
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,2,1,128,1,float16,fp8,0,0.01509333277742068
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,float16,0,0.015274666249752045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,2,2,128,1,float16,fp8,0,0.017386666188637417
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,float16,0,0.013002666334311167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.01505600040157636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,2,1,128,1,float16,float16,0,0.013221333424250284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,2,1,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,float16,0,0.015040000279744467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,2,2,128,1,float16,fp8,0,0.015061333775520325
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,float16,0,0.012650666137536367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,2,1,128,1,float16,fp8,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,2,1,128,1,float16,float16,0,0.013034666577974955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,fp8,0,0.013242666920026144
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,2,2,128,1,float16,float16,0,0.013151999562978745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.013072000195582708
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,2,1,128,1,float16,float16,0,0.01310933381319046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,float16,0,0.012997332960367203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,2,1,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,2,2,128,1,float16,fp8,0,0.013173333058754602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,float16,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,2,1,128,1,float16,float16,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,2,1,128,1,float16,fp8,0,0.013274667163689932
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,float16,0,0.012981332838535309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,2,2,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,float16,0,0.029743999242782593
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,2,1,128,1,float16,fp8,0,0.038949333131313324
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,64,2,1,128,1,float16,float16,0,0.03583466758330663
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.03513066718975703
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,2,1,128,1,float16,float16,0,0.02319466571013133
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,2,1,128,1,float16,fp8,0,0.02552533398071925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,float16,0,0.029520000020662945
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,float16,0,0.01937599976857503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.021530665457248688
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,2,2,128,1,float16,fp8,0,0.035205334424972534
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,2,1,128,1,float16,float16,0,0.01700266698996226
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,float16,0,0.01918399954835574
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,2,2,128,1,float16,fp8,0,0.023232000569502514
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,2,1,128,1,float16,fp8,0,0.017866666118303936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,float16,0,0.015274666249752045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.016789333273967106
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,2,1,128,1,float16,float16,0,0.013056000073750814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,float16,0,0.015322666615247726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,2,1,128,1,float16,fp8,0,0.015295999745527903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,float16,0,0.01303999995191892
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.014959999670584997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,2,2,128,1,float16,fp8,0,0.016800000021855038
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,2,1,128,1,float16,float16,0,0.01312000056107839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,2,1,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,fp8,0,0.013568000247081121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,2,2,128,1,float16,float16,0,0.01314666618903478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.013088000317414602
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,float16,0,0.013338666409254074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,2,1,128,1,float16,float16,0,0.011152000476916632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,2,1,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,float16,0,0.012645332763592402
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,float16,0,0.01099733387430509
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.014416000495354334
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,2,2,128,1,float16,fp8,0,0.013082666943470636
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,2,1,128,1,float16,float16,0,0.010981333752473196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,2,1,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,float16,0,0.012826666235923767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,2,2,128,1,float16,fp8,0,0.013167999684810638
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,float16,0,0.01108266661564509
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,2,1,128,1,float16,float16,0,0.013162666310866674
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,2,1,128,1,float16,fp8,0,0.012938667088747025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,fp8,0,0.012981332838535309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,2,2,128,1,float16,float16,0,0.011215999722480774
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,float16,0,0.012757333616415659
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.01313599944114685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,2,1,128,1,float16,float16,0,0.011183999478816986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,2,1,128,1,float16,fp8,0,0.011893333246310553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,float16,0,0.012837332983811697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,2,2,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,float16,0,0.023333333432674408
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.025589334468046825
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,2,1,128,1,float16,fp8,0,0.029733332494894665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,32,2,1,128,1,float16,float16,0,0.029333333174387615
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,2,1,128,1,float16,float16,0,0.019306667149066925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,2,1,128,1,float16,fp8,0,0.02015999952952067
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,float16,0,0.015247999380032221
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,fp8,0,0.025248001019159954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,2,2,128,1,float16,float16,0,0.02334933231274287
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.017136000096797943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,2,1,128,1,float16,float16,0,0.014842666685581207
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,2,1,128,1,float16,fp8,0,0.014991999914248785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,float16,0,0.015295999745527903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,2,2,128,1,float16,fp8,0,0.017488000293572743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,float16,0,0.013269333789745966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.01488000030318896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,2,1,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,2,1,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,float16,0,0.011055999745925268
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,float16,0,0.01331199953953425
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,2,2,128,1,float16,fp8,0,0.01403733342885971
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.012815999488035837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,2,1,128,1,float16,float16,0,0.011029332876205444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,2,1,128,1,float16,fp8,0,0.012853333105643591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,float16,0,0.011226666470368704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,2,2,128,1,float16,fp8,0,0.01292266696691513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,float16,0,0.011285333583752314
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.012741333494583765
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,2,1,128,1,float16,float16,0,0.010938666760921478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,2,1,128,1,float16,fp8,0,0.012682666381200155
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,float16,0,0.01119999960064888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,float16,0,0.01126933346192042
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,2,2,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,2,1,128,1,float16,float16,0,0.011237333218256632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,2,1,128,1,float16,fp8,0,0.011717333147923151
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,float16,0,0.012863999853531519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,2,2,128,1,float16,fp8,0,0.01267733300725619
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,float16,0,0.011109333485364914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.011354666203260422
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,2,1,128,1,float16,float16,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,2,1,128,1,float16,fp8,0,0.012810666114091873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,float16,0,0.01102399950226148
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,float16,0,0.012608000387748083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,2,2,128,1,float16,fp8,0,0.011349332829316458
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.012773333738247553
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,2,1,128,1,float16,float16,0,0.010960000256697336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,2,1,128,1,float16,fp8,0,0.011045332998037338
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,float16,0,0.012453333785136541
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,2,2,128,1,float16,fp8,0,0.011120000233252844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,float16,0,0.019365333020687103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,2,1,128,1,float16,float16,0,0.02554133286078771
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,16,2,1,128,1,float16,fp8,0,0.025413334369659424
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.019130667050679524
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,2,1,128,1,float16,float16,0,0.01709866647919019
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,2,1,128,1,float16,fp8,0,0.01743999992807706
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,float16,0,0.019226666539907455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,float16,0,0.014912000546852747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,2,2,128,1,float16,fp8,0,0.019088000059127808
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.015002666662136713
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,2,1,128,1,float16,float16,0,0.013248000293970108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,fp8,0,0.015125333021084467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,2,2,128,1,float16,float16,0,0.013882666826248169
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,float16,0,0.012826666235923767
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,2,1,128,1,float16,fp8,0,0.01351999988158544
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.013045333325862885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,2,1,128,1,float16,float16,0,0.012746666868527731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,2,1,128,1,float16,fp8,0,0.012858666479587555
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,float16,0,0.011050666371981302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,fp8,0,0.012719999998807907
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,2,2,128,1,float16,float16,0,0.012997332960367203
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,2,1,128,1,float16,float16,0,0.010944000134865442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,2,1,128,1,float16,fp8,0,0.01257066677014033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,float16,0,0.012080000092585882
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,2,2,128,1,float16,fp8,0,0.013151999562978745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,float16,0,0.010922666639089584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.012170666207869848
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,2,1,128,1,float16,float16,0,0.010922666639089584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,2,1,128,1,float16,fp8,0,0.012122667084137598
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,float16,0,0.011045332998037338
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,float16,0,0.011418666690587997
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,2,2,128,1,float16,fp8,0,0.012879999975363413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,2,1,128,1,float16,float16,0,0.011114666859308878
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.012053333222866058
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,2,1,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,float16,0,0.01116266722480456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,2,2,128,1,float16,fp8,0,0.011109333485364914
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,2,1,128,1,float16,float16,0,0.012869333227475485
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,float16,0,0.012223999947309494
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,2,1,128,1,float16,fp8,0,0.011328000575304031
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,float16,0,0.011066666493813196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,2,2,128,1,float16,fp8,0,0.012821332861979803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,float16,0,0.012469333906968435
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.011039999624093374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,2,1,128,1,float16,float16,0,0.010874666273593903
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,2,1,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,float16,0,0.012970666090647379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,2,2,128,1,float16,fp8,0,0.010960000256697336
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.011183999478816986
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,0,0.01302933320403099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,0,0.012917333592971167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,0,0.010885333021481832
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,0,0.0107893335322539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,0,0.011002667248249054
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,0,0.012768000364303589
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,0,0.011312000453472137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.010735999792814255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,0,0.009173333023985228
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,0,0.011152000476916632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,0,0.01101333275437355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,0,0.00897066667675972
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.01062400018175443
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,0,0.010826667149861654
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,0,0.009216000015536943
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,0,0.0106133334338665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,0,0.01116266722480456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,0,0.008997333546479544
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.008869333192706108
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,0,0.009632000078757605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,0,0.010944000134865442
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,0,0.008885333314538002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,0,0.00972800018886725
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,0,0.009232000137368837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.010570666442314783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,0,0.009162666896979014
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,0,0.010741333166758219
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,0,0.008874666566650072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,0,0.009610666582981745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,0,0.010773333410422007
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.010698666175206503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,0,0.008997333546479544
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,0,0.00891733355820179
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,0,0.010122666756312052
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,0,0.01110400011142095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,0,0.009279999881982803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.009573333586255709
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,0,0.00966933307548364
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,0,0.010597333312034607
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,0,0.009205333267649015
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,0,0.009882666791478792
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,0,0.009583999713261923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.01073066641887029
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,0,0.008890666688481966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,0,0.01081066702802976
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,0,0.010698666175206503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,0,0.009119999905427298
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,float16,0,0.3782453139623006
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,0.3370346625645955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,float16,0,0.3755999803543091
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16384,1,1,128,1,float16,fp8,0,0.33743464946746826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,float16,0,0.23308799664179483
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,0.21704532702763876
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,float16,0,0.2322559952735901
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16384,1,1,128,1,float16,fp8,0,0.21940267086029053
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,float16,0,0.20995734135309854
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.20171199242273966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,float16,0,0.2118133306503296
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16384,1,1,128,1,float16,fp8,0,0.1996906598409017
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,float16,0,0.2344640096028646
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,0.22623467445373535
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,float16,0,0.23417067527770996
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,12288,1,1,128,1,float16,fp8,0,0.22668800751368204
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,float16,0,0.17415465911229452
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,0.16671466827392578
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,float16,0,0.17356799046198526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,12288,1,1,128,1,float16,fp8,0,0.16916267077128092
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,float16,0,0.15983999768892923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.1565600037574768
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,float16,0,0.16171733538309732
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,12288,1,1,128,1,float16,fp8,0,0.154341330130895
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,float16,0,0.17748266458511353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,0.17339199781417847
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,float16,0,0.17765865723292032
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,float16,0,0.14739200472831726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.14197333653767905
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,10240,1,1,128,1,float16,fp8,0,0.17238932847976685
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,float16,0,0.14703999956448874
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,float16,0,0.13615467151006064
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,10240,1,1,128,1,float16,fp8,0,0.1406880021095276
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.13365333278973898
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,float16,0,0.13607999682426453
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,10240,1,1,128,1,float16,fp8,0,0.13114666938781738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,float16,0,0.23092800378799438
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,0.21261332432428995
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,float16,0,0.2285333275794983
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,8192,1,1,128,1,float16,fp8,0,0.2126026749610901
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,float16,0,0.1359946628411611
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,0.13406399885813394
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,float16,0,0.13590400417645773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,float16,0,0.11935999989509583
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.11761066317558289
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,float16,0,0.11755733688672383
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,8192,1,1,128,1,float16,fp8,0,0.1322826643784841
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,8192,1,1,128,1,float16,fp8,0,0.11738666892051697
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,float16,0,0.1113920013109843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.10760533809661865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,float16,0,0.1116480032602946
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,8192,1,1,128,1,float16,fp8,0,0.10905067125956218
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,float16,0,0.1437333325544993
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,0.14461867014567056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,float16,0,0.14223999778429666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,6144,1,1,128,1,float16,fp8,0,0.1450506647427877
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,float16,0,0.10223467151323955
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.10218666990598042
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,float16,0,0.10161599516868591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,6144,1,1,128,1,float16,fp8,0,0.10341333349545796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,float16,0,0.09112000465393066
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.09087466200192769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,fp8,0,0.09101333220799764
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,6144,1,1,128,1,float16,float16,0,0.09075733025868733
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,float16,0,0.08654933174451192
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.08452799916267395
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,float16,0,0.08705600102742513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,6144,1,1,128,1,float16,fp8,0,0.08450133601824443
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,float16,0,0.1537813345591227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.14031466841697693
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,float16,0,0.1548746625582377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,4096,1,1,128,1,float16,fp8,0,0.14375999569892883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,float16,0,0.08593599994977315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.08776000142097473
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,float16,0,0.08653333783149719
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,4096,1,1,128,1,float16,fp8,0,0.08813333511352539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,float16,0,0.07170133292675018
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.07257600128650665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,float16,0,0.06423999865849812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,fp8,0,0.07348800202210744
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,4096,1,1,128,1,float16,float16,0,0.07227733234564464
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.0642133355140686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,float16,0,0.06449066599210103
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,float16,0,0.06224533418814341
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,4096,1,1,128,1,float16,fp8,0,0.0658240020275116
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.06043200194835663
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,float16,0,0.062037333846092224
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,4096,1,1,128,1,float16,fp8,0,0.06228800117969513
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,float16,0,0.09849066535631816
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.10313600301742554
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,fp8,0,0.103685329357783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,3072,1,1,128,1,float16,float16,0,0.09682666261990865
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,float16,0,0.06648000081380208
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.06821333368619283
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,float16,0,0.06725866595904033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,3072,1,1,128,1,float16,fp8,0,0.07045866549015045
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,float16,0,0.056559999783833824
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.05885866781075796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,fp8,0,0.05821333328882853
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,3072,1,1,128,1,float16,float16,0,0.05789866546789805
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,float16,0,0.052000001072883606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.05219733218352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,float16,0,0.052255998055140175
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,3072,1,1,128,1,float16,fp8,0,0.05190399785836538
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,float16,0,0.04985600213209788
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.05000533163547516
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,float16,0,0.050106664498647056
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,3072,1,1,128,1,float16,fp8,0,0.04885333279768626
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,float16,0,0.11327999830245972
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.109525332848231
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,float16,0,0.11403733491897583
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,2048,1,1,128,1,float16,fp8,0,0.1053600013256073
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,float16,0,0.06213866670926412
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.06607466439406078
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,float16,0,0.06176533301671346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,2048,1,1,128,1,float16,fp8,0,0.0661653329928716
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,float16,0,0.04791999856630961
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.052000001072883606
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,float16,0,0.04805333415667216
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,2048,1,1,128,1,float16,fp8,0,0.051551997661590576
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,float16,0,0.04188799858093262
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.043706665436426796
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,float16,0,0.041696002086003624
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,2048,1,1,128,1,float16,fp8,0,0.044122666120529175
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,float16,0,0.03975466638803482
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.03788800040880839
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,fp8,0,0.039690665900707245
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,2048,1,1,128,1,float16,float16,0,0.038245332737763725
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,float16,0,0.037317333122094475
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.03754133234421412
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,float16,0,0.03762666632731756
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,2048,1,1,128,1,float16,fp8,0,0.03760000069936117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,float16,0,0.07358933488527934
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.08078399797280629
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,float16,0,0.07236800094445546
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,float16,0,0.04942933221658071
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.05398400127887726
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,float16,0,0.04930666585763296
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1536,1,1,128,1,float16,fp8,0,0.05347733199596405
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,float16,0,0.040021332601706185
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.04168533285458883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1536,1,1,128,1,float16,fp8,0,0.08291733264923096
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,float16,0,0.033520000676314034
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,fp8,0,0.04244266450405121
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1536,1,1,128,1,float16,float16,0,0.03965866565704346
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.03562666724125544
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,float16,0,0.03164266546567281
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,float16,0,0.03372266640265783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1536,1,1,128,1,float16,fp8,0,0.03571200122435888
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.032501332461833954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,float16,0,0.0315786674618721
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,float16,0,0.03130666663249334
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1536,1,1,128,1,float16,fp8,0,0.03155199935038885
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,float16,0,0.031221332649389904
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.03129599988460541
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1536,1,1,128,1,float16,fp8,0,0.031328000128269196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,float16,0,0.09412800272305806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.0915786623954773
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,fp8,0,0.08701866865158081
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1024,1,1,128,1,float16,float16,0,0.09470933675765991
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,float16,0,0.049178664882977806
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.0554720014333725
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,float16,0,0.0495306650797526
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,float16,0,0.03590933233499527
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.041365332901477814
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1024,1,1,128,1,float16,fp8,0,0.05395199855168661
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,float16,0,0.036746665835380554
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1024,1,1,128,1,float16,fp8,0,0.03972800076007843
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,float16,0,0.03267733256022135
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,float16,0,0.031354665756225586
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.031845333675543465
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1024,1,1,128,1,float16,fp8,0,0.03197866678237915
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,float16,0,0.027322667340437572
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.027450665831565857
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,float16,0,0.027488000690937042
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1024,1,1,128,1,float16,fp8,0,0.02735999971628189
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,float16,0,0.025434667865435284
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.025429333249727886
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,float16,0,0.025583999852339428
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1024,1,1,128,1,float16,fp8,0,0.02535466601451238
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,float16,0,0.025194667279720306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.023552000522613525
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,float16,0,0.023381332556406658
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1024,1,1,128,1,float16,fp8,0,0.0262719988822937
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,float16,0,0.08470933636029561
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.0811466674009959
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,fp8,0,0.07878399888674419
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,512,1,1,128,1,float16,float16,0,0.08476266264915466
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,float16,0,0.043552001317342125
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.04942933221658071
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,float16,0,0.04348800083001455
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,512,1,1,128,1,float16,fp8,0,0.05003733436266581
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,float16,0,0.03136533250411352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.03541333228349686
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,float16,0,0.03148266673088074
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,float16,0,0.025199999411900837
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,512,1,1,128,1,float16,fp8,0,0.03527999917666117
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.027317332724730175
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,float16,0,0.025120000044504803
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,float16,0,0.02143999934196472
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,512,1,1,128,1,float16,fp8,0,0.027215999861558277
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.021562665700912476
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,float16,0,0.02146133283774058
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,512,1,1,128,1,float16,fp8,0,0.02128000060717265
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,float16,0,0.019354666272799175
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.01932266727089882
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,float16,0,0.01931200052301089
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,512,1,1,128,1,float16,fp8,0,0.02102400114138921
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,float16,0,0.018863999595244724
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.01922133316596349
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,float16,0,0.019333332777023315
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,512,1,1,128,1,float16,fp8,0,0.018954666952292126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,float16,0,0.01716800034046173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.018800000349680584
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,float16,0,0.01918399954835574
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,512,1,1,128,1,float16,fp8,0,0.017418666432301205
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,float16,0,0.04015466570854187
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,float16,0,0.04016000032424927
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.047093331813812256
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,float16,0,0.027701333165168762
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.03242666771014532
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,256,1,1,128,1,float16,fp8,0,0.04682666560014089
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,float16,0,0.021349333226680756
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,float16,0,0.02739199995994568
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,256,1,1,128,1,float16,fp8,0,0.03310399999221166
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,float16,0,0.02128533273935318
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.02372266600529353
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,float16,0,0.0189280000825723
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,256,1,1,128,1,float16,fp8,0,0.02329600105683009
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.01926400015751521
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,fp8,0,0.01933866615096728
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,float16,0,0.01718933383623759
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,256,1,1,128,1,float16,float16,0,0.018207999567190807
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.01739199956258138
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,float16,0,0.01710933322707812
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,256,1,1,128,1,float16,fp8,0,0.01717866708834966
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.015194666882356008
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,float16,0,0.015365333606799444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,float16,0,0.0164533331990242
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,256,1,1,128,1,float16,fp8,0,0.014901333798964819
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,float16,0,0.01524266724785169
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.015263999501864115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,float16,0,0.015311999867359797
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,256,1,1,128,1,float16,fp8,0,0.014965333044528961
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,float16,0,0.015114666273196539
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,float16,0,0.015333333363135656
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.015263999501864115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,256,1,1,128,1,float16,fp8,0,0.01488000030318896
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,float16,0,0.02537599951028824
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.031018666923046112
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,fp8,0,0.02940800040960312
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,128,1,1,128,1,float16,float16,0,0.025519999365011852
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,float16,0,0.019258666783571243
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.02143999934196472
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,float16,0,0.01915733392039935
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,float16,0,0.015098666151364645
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,128,1,1,128,1,float16,fp8,0,0.021509334444999695
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.01704000060757001
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,float16,0,0.014991999914248785
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,float16,0,0.014335999886194864
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,128,1,1,128,1,float16,fp8,0,0.01693333312869072
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.014949332922697067
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,float16,0,0.014352000008026758
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,float16,0,0.012928000340859095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,128,1,1,128,1,float16,fp8,0,0.015157333264748255
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.013290667285521826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,float16,0,0.013151999562978745
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,128,1,1,128,1,float16,fp8,0,0.01314666618903478
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.01310933381319046
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,float16,0,0.013002666334311167
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,float16,0,0.012874666601419449
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,128,1,1,128,1,float16,fp8,0,0.012986666212479273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.013141332815090815
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,float16,0,0.012853333105643591
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,128,1,1,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,float16,0,0.012944000462690989
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.013376000026861826
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,float16,0,0.01302933320403099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,128,1,1,128,1,float16,fp8,0,0.01302933320403099
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,float16,0,0.019130667050679524
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,float16,0,0.019274666905403137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.023082666099071503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,64,1,1,128,1,float16,fp8,0,0.02256533255179723
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,float16,0,0.015392000476519266
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.017071999609470367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,float16,0,0.015103999525308609
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,64,1,1,128,1,float16,fp8,0,0.01720533271630605
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,float16,0,0.013023999830087027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.014970666418472925
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,float16,0,0.013237333546082178
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,64,1,1,128,1,float16,fp8,0,0.014853333433469137
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,float16,0,0.013077333569526672
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.013061333447694778
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,float16,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,64,1,1,128,1,float16,fp8,0,0.012949333836634954
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,float16,0,0.011338666081428528
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.012960000584522883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,float16,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,64,1,1,128,1,float16,fp8,0,0.01239466667175293
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,float16,0,0.012800000607967377
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.013104000439246496
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,float16,0,0.012981332838535309
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,64,1,1,128,1,float16,fp8,0,0.012991999586423239
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,float16,0,0.011776000261306763
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.012106666962305704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,float16,0,0.012666666259368261
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,64,1,1,128,1,float16,fp8,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,float16,0,0.012938667088747025
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.012693333129088083
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,float16,0,0.012794667234023413
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,64,1,1,128,1,float16,fp8,0,0.013023999830087027
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,float16,0,0.017184000462293625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.017093333105246227
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,float16,0,0.015178666760524115
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,float16,0,0.014671999961137772
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.013445333888133367
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,32,1,1,128,1,float16,fp8,0,0.017050666113694508
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,float16,0,0.014325333138306936
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,32,1,1,128,1,float16,fp8,0,0.013914667069911957
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,float16,0,0.013199999928474426
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.012784000486135483
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,float16,0,0.012240000069141388
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,32,1,1,128,1,float16,fp8,0,0.0129120002190272
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,float16,0,0.011365332951148352
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,float16,0,0.011285333583752314
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.012805332740147909
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,32,1,1,128,1,float16,fp8,0,0.013210666676362356
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,float16,0,0.011301333705584208
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.011152000476916632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,float16,0,0.011333333949247995
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,float16,0,0.011413333316644033
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,32,1,1,128,1,float16,fp8,0,0.011578666667143503
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.01121066634853681
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,float16,0,0.011194666226704916
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,32,1,1,128,1,float16,fp8,0,0.011461333682139715
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,float16,0,0.01126933346192042
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,float16,0,0.011146667102972666
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.011136000355084738
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,32,1,1,128,1,float16,fp8,0,0.012847999731699625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.011306667079528173
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,float16,0,0.011120000233252844
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,float16,0,0.012106666962305704
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,32,1,1,128,1,float16,fp8,0,0.012778667112191519
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,float16,0,0.014912000546852747
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.015040000279744467
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,fp8,0,0.01522133375207583
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,16,1,1,128,1,float16,float16,0,0.014975999792416891
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,float16,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.012890666723251343
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,float16,0,0.01301866645614306
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,float16,0,0.011930666863918304
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.012954667210578918
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,float16,0,0.012741333494583765
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,16,1,1,128,1,float16,fp8,0,0.013264000415802002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,float16,0,0.011237333218256632
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,16,1,1,128,1,float16,fp8,0,0.012896000097195307
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.012965332716703415
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,float16,0,0.01118933285276095
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,float16,0,0.012138667205969492
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,16,1,1,128,1,float16,fp8,0,0.010981333752473196
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.012901333471139273
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,float16,0,0.01101333275437355
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,16,1,1,128,1,float16,fp8,0,0.013066666821638743
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,float16,0,0.011029332876205444
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.012762666990359625
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,float16,0,0.011039999624093374
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,float16,0,0.010784000158309937
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,16,1,1,128,1,float16,fp8,0,0.012885333349307379
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.011130666981140772
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,float16,0,0.011130666981140772
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,float16,0,0.012960000584522883
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,16,1,1,128,1,float16,fp8,0,0.01097600037852923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.01108266661564509
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,float16,0,0.01102399950226148
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,16,1,1,128,1,float16,fp8,0,0.012789333860079447
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,0,0.010842667271693548
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.010837333897749582
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,0,0.011264000087976456
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,0,0.011050666371981302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,0,0.010992000500361124
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.01062400018175443
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,0,0.010965333630641302
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,0,0.010570666442314783
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,0,0.010191999996701876
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.010608000059922537
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,0,0.011242666592200598
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,0,0.009141333401203156
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.010538666198650995
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,0,0.01080000028014183
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,0,0.009152000149091085
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,0,0.010298666854699453
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,0,0.011077333241701126
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.00961599995692571
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,0,0.009663999701539675
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,0,0.0106133334338665
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,0,0.01090666651725769
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.009919999788204828
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,0,0.009375999992092451
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,0,0.009226666763424873
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,0,0.009359999870260557
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.008842666943868002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,0,0.009583999713261923
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,0,0.009984000275532404
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,0,0.008885333314538002
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.010773333410422007
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,0,0.009248000259200731
VLLM,0.14.0,NVIDIA GB200,context_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,0,0.009359999870260557
