framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,1,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,7,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,7,0.010608000059922537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,15,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,15,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,31,0.010656000425418219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,31,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,3,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,3,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,63,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,63,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,127,0.010538666198650995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,127,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,255,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,255,0.010597333312034607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,511,0.011637333780527115
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,511,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,1023,0.012944000462690989
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,1023,0.013151999562978745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,2047,0.029722665747006733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,2047,0.01704000060757001
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,4095,0.03133333226044973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,float16,4095,0.04821333289146423
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,1,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,3,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,3,0.01119999960064888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,7,0.011258666714032492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,7,0.011061333119869232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,15,0.012373333175977072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,15,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,31,0.011349332829316458
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,31,0.01116266722480456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,63,0.011109333485364914
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,63,0.01257066677014033
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,127,0.011141333729028702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,1,0.011253333340088526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,127,0.011226666470368704
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,255,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,255,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,511,0.013088000317414602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,511,0.013045333325862885
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,1023,0.03031466652949651
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,1023,0.015520000209410986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,2047,0.04818133513132731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,2047,0.031248000760873158
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,float16,4095,0.08450667063395183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,1,0.009029333169261614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,8,128,1,float16,fp8,4095,0.049813335140546165
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,3,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,1,0.010549332946538925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,3,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,7,0.008757333581646284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,7,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,15,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,15,0.008821333448092142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,31,0.010453333457310995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,31,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,63,0.008746666833758354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,63,0.010682666053374609
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,127,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,127,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,255,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,255,0.008965333302815756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,511,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,511,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,1023,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,1023,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,2047,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,2047,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,4095,0.011168000598748526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,4095,0.011141333729028702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,8191,0.015919999529918034
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,8191,0.015376000354687372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,float16,16383,0.01922133316596349
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,4,128,1,float16,fp8,16383,0.018965333700180054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,1,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,1,0.010410666465759277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,3,0.00955200009047985
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,3,0.008762666955590248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,7,0.009002666920423508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,15,0.009205333267649015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,15,0.010485333700974783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,31,0.008767999708652496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,31,0.010608000059922537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,63,0.009002666920423508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,63,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,64,4,128,1,float16,fp8,1,0.012128000458081564
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,127,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,127,0.008810666700204214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,255,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,511,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,511,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,1023,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,1023,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,2047,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,2047,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,4095,0.012863999853531519
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,4095,0.01320533330241839
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,7,0.009957333405812582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,8191,0.014917333920796713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,8191,0.014959999670584997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,float16,16383,0.017279999951521557
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,1,0.009850666547815004
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,1,0.010522666076819101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,3,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,255,0.010543999572594961
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,7,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,7,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,15,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,15,0.010501333822806677
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,31,0.010533332824707031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,31,0.010565333068370819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,63,0.009583999713261923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,64,8,128,1,float16,fp8,16383,0.01695999999841054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,63,0.011354666203260422
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,127,0.009930666536092758
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,127,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,255,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,511,0.010751999914646149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,511,0.011194666226704916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,1023,0.010762666662534079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,1023,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,2047,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,3,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,2047,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,4095,0.013173333058754602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,4095,0.015040000279744467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,8191,0.01685333376129468
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,8191,0.015119999647140503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,16383,0.019098666807015736
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,fp8,16383,0.019039999693632126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,1,0.010399999717871347
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,1,0.008997333546479544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,3,0.01027199998497963
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,3,0.008896000062425932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,7,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,7,0.01055466632048289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,15,0.008826666822036108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,15,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,31,0.010586666564146677
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,31,0.009514666472872099
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,63,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,63,0.010650667051474253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,127,0.008805333326260248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,127,0.010490667074918747
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,255,0.009061333412925402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,255,0.008789333204428354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,4,128,1,float16,float16,255,0.009503999724984169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,511,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,511,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,1023,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,1023,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,2047,0.012847999731699625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,4095,0.014991999914248785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,4095,0.012965332716703415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,8191,0.01659199967980385
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,8191,0.014938666174809137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,float16,16383,0.03136533250411352
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,16383,0.01703466723362605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,1,0.012949333836634954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,1,0.011178666104873022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,3,0.012725333372751871
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,3,0.011098666737476984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,7,0.012154666086037954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,7,0.012863999853531519
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,15,0.011887999872366587
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,15,0.011258666714032492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,31,0.012890666723251343
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,31,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,64,8,128,1,float16,fp8,2047,0.012789333860079447
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,63,0.011648000528415045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,63,0.012714666624863943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,127,0.01192533348997434
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,127,0.012800000607967377
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,255,0.012869333227475485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,255,0.012730666746695837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,511,0.014853333433469137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,511,0.01488000030318896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,1023,0.03137599925200144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,1023,0.01714666684468587
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,float16,2047,0.049626668294270836
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,4,128,1,float16,fp8,2047,0.03288000077009201
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,3,0.01331199953953425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,1,0.013178666432698568
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,3,0.013232000172138214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,7,0.012896000097195307
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,7,0.013882666826248169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,15,0.0145066666106383
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,15,0.01293333371480306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,31,0.013663999736309052
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,31,0.012949333836634954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,63,0.012954667210578918
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,63,0.012874666601419449
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,127,0.014773332824309668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,127,0.012831999609867731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,1,0.013093333691358566
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,255,0.013557333499193192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,255,0.013066666821638743
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,511,0.030165334542592365
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,1023,0.04958933095137278
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,1023,0.03142400085926056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,511,0.01708799973130226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,1,0.0099093330403169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,1,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,fp8,2047,0.05051200091838837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,64,8,128,1,float16,float16,2047,0.0855466624101003
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,3,0.009578666960199675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,3,0.01071999967098236
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,7,0.008778666456540426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,15,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,15,0.009482666850090027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,31,0.009248000259200731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,31,0.009989333028594652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,63,0.00926399976015091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,127,0.009349333122372627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,127,0.010527999450763067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,255,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,255,0.010677333921194077
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,511,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,511,0.011077333241701126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,1023,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,1023,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,2047,0.013125333935022354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,2047,0.014117332796255747
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,7,0.009482666850090027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,4095,0.015216000378131866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,4095,0.014943999548753103
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,8191,0.01720533271630605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,8191,0.0170666662355264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,16383,0.01911466692884763
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,float16,16383,0.03294399877389272
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,1,0.009530666594703993
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,4,128,1,float16,fp8,63,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,1,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,3,0.00901333304742972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,7,0.009658666948477427
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,7,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,15,0.008879999940594038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,15,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,31,0.009226666763424873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,31,0.008938666433095932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,63,0.009621333330869675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,127,0.00884799969693025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,127,0.009818666925032934
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,255,0.010128000130256018
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,255,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,511,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,511,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,1023,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,1023,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,2047,0.012938667088747025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,2047,0.012709333250919977
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,4095,0.012938667088747025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,4095,0.014773332824309668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,3,0.00984533317387104
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,8191,0.02991466720898946
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,16383,0.031285333136717476
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,16383,0.048170665899912514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,float16,63,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,1,0.014837333311637243
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,1,0.014853333433469137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,3,0.014991999914248785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,3,0.014837333311637243
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,7,0.01516266663869222
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,7,0.014848000059525171
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,64,8,128,1,float16,fp8,8191,0.01580799991885821
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,15,0.014858666807413101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,15,0.015040000279744467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,31,0.015194666882356008
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,31,0.014949332922697067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,63,0.014981333166360855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,63,0.01524266724785169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,127,0.015066667149464289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,127,0.014981333166360855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,255,0.014885333677132925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,255,0.014938666174809137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,511,0.031712000568707786
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,511,0.018986667195955913
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,float16,1023,0.04979733129342397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,4,128,1,float16,fp8,1023,0.03387733300526937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,1,0.017024000485738117
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,1,0.01907733331123988
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,3,0.01884799947341283
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,7,0.017071999609470367
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,7,0.017551999539136887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,3,0.01701333373785019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,15,0.01886933296918869
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,31,0.018960000326236088
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,31,0.0173333336909612
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,63,0.016949333250522614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,63,0.01801066721479098
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,127,0.018757333358128864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,127,0.01691199963291486
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,255,0.029711998999118805
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,255,0.017231999586025875
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,15,0.017008000363906223
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,511,0.04896000027656555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,511,0.031370667119820915
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,float16,1023,0.08452799916267395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,64,8,128,1,float16,fp8,1023,0.049957334995269775
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,float16,1,0.021173333128293354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,1,0.018901333212852478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,float16,3,0.021205333371957142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,3,0.019280000279347103
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,float16,7,0.021002667645613354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,7,0.019189332922299702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,15,0.01911466692884763
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,float16,31,0.021344001094500225
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,31,0.01894933357834816
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,float16,15,0.021541332205136616
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,float16,63,0.020997333029905956
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,63,0.01913600042462349
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,float16,127,0.021477334201335907
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,127,0.019050666441520054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,float16,255,0.031354665756225586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,1,0.02517866591612498
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,4,128,1,float16,fp8,255,0.018954666952292126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,float16,3,0.02717866748571396
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,3,0.025018667181332905
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,float16,7,0.02714666724205017
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,float16,1,0.02735999971628189
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,7,0.02513599892457326
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,float16,15,0.027461332579453785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,float16,31,0.02738133321205775
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,31,0.02514133354028066
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,float16,63,0.02771199991305669
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,15,0.025173333783944447
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,63,0.025045332809289295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,float16,127,0.03330666571855545
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,127,0.025055999557177227
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,fp8,255,0.03339733431736628
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,64,8,128,1,float16,float16,255,0.050101334849993386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,float16,1,0.033999999364217125
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,fp8,1,0.029296000798543293
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,fp8,3,0.02941333254178365
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,float16,3,0.035455999275048576
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,float16,7,0.03551999976237615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,fp8,7,0.029301332930723827
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,float16,15,0.034927998979886375
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,fp8,15,0.02940266579389572
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,float16,31,0.035429333647092186
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,fp8,31,0.029317334294319153
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,float16,63,0.03504000107447306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,fp8,63,0.03014400104681651
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,float16,127,0.03894400099913279
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,4,128,1,float16,fp8,127,0.029445332785447437
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,float16,1,0.047695999344189964
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,fp8,1,0.04178666571776072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,float16,3,0.048767998814582825
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,fp8,3,0.04132800052563349
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,float16,7,0.048351998130480446
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,fp8,7,0.04144533226887385
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,float16,15,0.048432002464930214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,float16,31,0.047695999344189964
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,fp8,31,0.04173333446184794
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,fp8,15,0.0415040006240209
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,float16,63,0.04914666712284088
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,1,0.008767999708652496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,1,0.009509333098928133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,3,0.008992000172535578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,fp8,63,0.04141866664091746
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,3,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,7,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,7,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,float16,127,0.05686933298905691
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,15,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,15,0.009775999933481216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,31,0.008709333216150602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,63,0.009759999811649323
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,31,0.010591999938090643
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,63,0.009466666728258133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,127,0.010581333190202713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,255,0.008832000195980072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,255,0.01071999967098236
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,511,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,64,8,128,1,float16,fp8,127,0.04538666705290476
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,511,0.011120000233252844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,1023,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,2047,0.013045333325862885
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,2047,0.012719999998807907
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,1023,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,4095,0.015354666858911514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,4095,0.014991999914248785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,8191,0.030271999537944794
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,8191,0.018570666511853535
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,16383,0.04816000163555145
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,fp8,16383,0.031770666440327965
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,1,0.010501333822806677
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,1,0.008826666822036108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,3,0.010213333492477735
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,3,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,7,0.008816000074148178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,7,0.010602666685978571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,15,0.00966933307548364
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,15,0.00972800018886725
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,4,128,1,float16,float16,127,0.010112000008424124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,31,0.01044800008336703
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,31,0.010527999450763067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,63,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,63,0.010575999816258749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,127,0.010565333068370819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,127,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,255,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,255,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,511,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,511,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,1023,0.012618667135636011
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,1023,0.012821332861979803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,2047,0.013023999830087027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,2047,0.013082666943470636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,4095,0.0303413321574529
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,4095,0.015247999380032221
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,8191,0.04778666794300079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,8191,0.03262399882078171
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,float16,16383,0.08337066570917766
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,64,8,128,1,float16,fp8,16383,0.04953599969546
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,4,128,1,float16,fp8,1,0.05106666684150696
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,4,128,1,float16,float16,1,0.06292266647020976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,4,128,1,float16,float16,3,0.0621919979651769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,4,128,1,float16,fp8,3,0.05043200155099233
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,4,128,1,float16,float16,7,0.06247999767462412
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,4,128,1,float16,fp8,7,0.05100266635417938
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,4,128,1,float16,float16,15,0.062218666076660156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,4,128,1,float16,fp8,15,0.05065066615740458
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,4,128,1,float16,float16,31,0.06206400195757548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,4,128,1,float16,fp8,31,0.05186666548252106
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,4,128,1,float16,float16,63,0.062234664956728615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,4,128,1,float16,fp8,63,0.051882664362589516
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,8,128,1,float16,float16,1,0.08535466591517131
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,8,128,1,float16,fp8,1,0.0755626658598582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,8,128,1,float16,float16,3,0.08602666854858398
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,8,128,1,float16,fp8,3,0.07564266522725423
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,8,128,1,float16,float16,7,0.0848640004793803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,8,128,1,float16,fp8,7,0.07638399799664815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,8,128,1,float16,float16,15,0.08568533261617024
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,8,128,1,float16,fp8,15,0.0765066643555959
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,8,128,1,float16,float16,31,0.08550399541854858
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,8,128,1,float16,fp8,31,0.07653866708278656
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,8,128,1,float16,float16,63,0.08505066235860188
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,64,8,128,1,float16,fp8,63,0.0748533308506012
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,4,128,1,float16,float16,1,0.11353600025177002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,4,128,1,float16,float16,3,0.11401066184043884
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,4,128,1,float16,fp8,3,0.09295466542243958
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,4,128,1,float16,float16,7,0.11361066500345866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,4,128,1,float16,fp8,7,0.09434133768081665
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,4,128,1,float16,float16,15,0.11346133550008138
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,4,128,1,float16,fp8,15,0.09294933080673218
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,4,128,1,float16,fp8,1,0.09492266178131104
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,4,128,1,float16,float16,31,0.11451733112335205
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,4,128,1,float16,fp8,31,0.0930560032526652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,8,128,1,float16,float16,1,0.15924266974131265
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,8,128,1,float16,fp8,1,0.14015466968218485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,8,128,1,float16,float16,3,0.1602826714515686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,8,128,1,float16,fp8,3,0.139082670211792
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,8,128,1,float16,float16,7,0.15877866744995117
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,8,128,1,float16,fp8,7,0.14053866267204285
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,8,128,1,float16,float16,15,0.16024000446001688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,8,128,1,float16,fp8,15,0.14044266939163208
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,1,0.009029333169261614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,3,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,1,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,3,0.010559999694426855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,7,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,7,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,15,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,15,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,31,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,31,0.010512000570694605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,63,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,63,0.010693332801262537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,127,0.009466666728258133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,127,0.010597333312034607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,255,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,255,0.010543999572594961
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,511,0.012773333738247553
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,511,0.01118933285276095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,1023,0.01292266696691513
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,1023,0.012944000462690989
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,2047,0.01515199989080429
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,2047,0.014959999670584997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,4095,0.031583999594052635
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,4095,0.017338667064905167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,float16,8191,0.04849599798520406
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,4,128,1,float16,fp8,8191,0.03272533416748047
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,8,128,1,float16,float16,31,0.1593546668688456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,1,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,1,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,3,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,7,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,3,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,7,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,15,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,15,0.010543999572594961
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,31,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,31,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,63,0.009712000067035357
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,63,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,127,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,127,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,255,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,255,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,511,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,511,0.011061333119869232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,1023,0.01302933320403099
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,1023,0.012901333471139273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,2047,0.029893333713213604
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,2047,0.015109332899252573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,4095,0.04762133459250132
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,float16,8191,0.08337600032488506
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,8191,0.04889066517353058
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,1,0.011045332998037338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,3,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,1,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,3,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,7,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,7,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,64,8,128,1,float16,fp8,4095,0.029696000119050343
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,15,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,15,0.011461333682139715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,31,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,31,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,63,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,63,0.011338666081428528
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,127,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,127,0.01080000028014183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,255,0.010666667173306147
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,255,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,511,0.011077333241701126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,511,0.01116266722480456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,1023,0.013253333667914072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,1023,0.013359999905029932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,2047,0.029520000020662945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,2047,0.017018667111794155
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,float16,4095,0.048165331284205117
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,4,128,1,float16,fp8,4095,0.03160533308982849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,1,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,1,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,3,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,3,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,7,0.01126933346192042
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,7,0.011247999966144562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,15,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,15,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,31,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,31,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,63,0.011045332998037338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,63,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,127,0.011077333241701126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,127,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,255,0.011061333119869232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,255,0.01126933346192042
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,511,0.013056000073750814
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,511,0.013237333546082178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,1023,0.029626667499542236
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,1023,0.014975999792416891
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,2047,0.031146667897701263
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,2047,0.048725331823031105
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,1,0.00938666673998038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,float16,4095,0.08434133728345235
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,48,8,128,1,float16,fp8,4095,0.049471999208132424
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,1,0.01022933361430963
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,3,0.009397333487868309
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,3,0.010048000141978264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,7,0.009578666960199675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,7,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,15,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,15,0.010389333590865135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,31,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,31,0.009557333464423815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,63,0.00955200009047985
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,63,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,127,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,127,0.010597333312034607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,255,0.009594666461149851
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,255,0.01022933361430963
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,511,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,1023,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,1023,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,2047,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,2047,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,4095,0.01119999960064888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,8191,0.01522133375207583
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,8191,0.01544533297419548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,16383,0.01757866640885671
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,16383,0.0173333336909612
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,1,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,64,8,128,1,float16,fp8,31,0.1405119995276133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,float16,511,0.011045332998037338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,3,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,7,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,4,128,1,float16,fp8,4095,0.011109333485364914
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,7,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,15,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,15,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,31,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,31,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,63,0.009525333220760027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,63,0.009226666763424873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,127,0.008965333302815756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,127,0.00919999989370505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,1,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,255,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,255,0.00949866697192192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,511,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,511,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,1023,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,1023,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,2047,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,2047,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,3,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,4095,0.012954667210578918
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,4095,0.012965332716703415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,8191,0.015077333897352219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,8191,0.015370666980743408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,float16,16383,0.017349333812793095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,48,8,128,1,float16,fp8,16383,0.016997333616018295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,1,0.009450666606426239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,3,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,3,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,7,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,15,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,7,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,15,0.00980266680320104
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,31,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,31,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,63,0.010138666878143946
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,127,0.010015999898314476
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,127,0.011349332829316458
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,255,0.011226666470368704
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,511,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,511,0.011381333072980246
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,1023,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,1,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,1023,0.011674666156371435
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,2047,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,4095,0.013487999637921652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,2047,0.011488000551859537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,4095,0.014831999937693277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,8191,0.01711999997496605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,8191,0.01533866673707962
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,63,0.012661332885424295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,16383,0.018837332725524902
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,fp8,16383,0.01743999992807706
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,1,0.009061333412925402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,4,128,1,float16,float16,255,0.009594666461149851
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,3,0.008992000172535578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,3,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,7,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,15,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,15,0.010064000263810158
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,31,0.010362666721145311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,31,0.008832000195980072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,63,0.009183999771873156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,63,0.009338666374484697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,127,0.010666667173306147
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,127,0.009589333087205887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,255,0.0103946669648091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,511,0.011120000233252844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,511,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,1023,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,1,0.010277333358923594
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,1023,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,2047,0.013264000415802002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,7,0.00903466654320558
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,2047,0.01309866706530253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,4095,0.013295999417702356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,4095,0.014021333307027817
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,8191,0.015130666395028433
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,8191,0.01504533365368843
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,16383,0.0322026660044988
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,1,0.0129120002190272
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,1,0.012944000462690989
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,float16,255,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,3,0.012944000462690989
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,7,0.01292266696691513
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,7,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,15,0.012960000584522883
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,15,0.011183999478816986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,31,0.012874666601419449
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,48,8,128,1,float16,fp8,16383,0.017055999487638474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,31,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,63,0.013056000073750814
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,63,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,127,0.012282667060693106
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,127,0.012671999633312225
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,255,0.01250133290886879
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,255,0.01180800050497055
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,511,0.015119999647140503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,511,0.01333333303531011
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,1023,0.03030933439731598
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,1023,0.017210666090250015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,float16,2047,0.0489333321650823
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,2047,0.03225066761175791
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,1,0.014725333700577417
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,1,0.012800000607967377
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,3,0.01322666679819425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,3,0.013093333691358566
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,7,0.01310933381319046
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,15,0.01332266628742218
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,15,0.012879999975363413
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,31,0.01360000049074491
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,31,0.013264000415802002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,63,0.013536000003417334
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,7,0.012901333471139273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,63,0.013290667285521826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,127,0.013253333667914072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,127,0.01322666679819425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,255,0.013925333817799887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,255,0.013786666095256805
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,511,0.029882666965325672
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,511,0.016997333616018295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,4,128,1,float16,fp8,3,0.012986666212479273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,1023,0.049728001157442726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,1023,0.03161066770553589
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,float16,2047,0.08609599868456523
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,1,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,3,0.009296000003814697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,48,8,128,1,float16,fp8,2047,0.05110933383305868
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,3,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,7,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,7,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,15,0.00960533320903778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,15,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,31,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,63,0.009466666728258133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,31,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,63,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,127,0.009546666716535887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,255,0.010117333382368088
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,255,0.009296000003814697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,511,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,511,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,1023,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,1023,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,2047,0.012869333227475485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,2047,0.013237333546082178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,4095,0.015226667126019796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,4095,0.015509333461523056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,8191,0.01728533332546552
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,8191,0.017157333592573803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,16383,0.03325333446264267
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,16383,0.019968000551064808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,1,0.009232000137368837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,3,0.009290666629870733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,3,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,7,0.00926399976015091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,7,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,15,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,15,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,31,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,31,0.00927466650803884
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,float16,1,0.009455999980370203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,1,0.00979200005531311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,63,0.009509333098928133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,127,0.009488000224033991
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,127,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,255,0.009754666437705358
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,255,0.009589333087205887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,511,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,4,128,1,float16,fp8,127,0.01156266654531161
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,1023,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,1023,0.011733333269755045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,2047,0.013114667187134424
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,2047,0.013173333058754602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,4095,0.014853333433469137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,8191,0.029296000798543293
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,4095,0.013536000003417334
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,8191,0.015125333021084467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,float16,16383,0.04839999973773956
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,63,0.008816000074148178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,16383,0.0313226655125618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,48,8,128,1,float16,fp8,511,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,3,0.014853333433469137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,3,0.015050667027632395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,1,0.01591466615597407
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,7,0.015072000523408255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,1,0.014869333555301031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,15,0.014874666929244995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,15,0.015658666690190632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,31,0.014762666076421738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,7,0.014826666563749313
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,31,0.014842666685581207
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,63,0.015546667079130808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,63,0.015008000036080679
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,127,0.014864000181357065
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,127,0.014885333677132925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,255,0.015439999600251516
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,255,0.015189333508412043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,511,0.03141866624355316
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,511,0.019109333554903667
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,float16,1023,0.05062933266162872
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,1,0.01905599981546402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,4,128,1,float16,fp8,1023,0.033946665624777474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,1,0.01736533393462499
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,3,0.01708799973130226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,7,0.017504000415404636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,3,0.019082666685183842
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,7,0.019088000059127808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,15,0.019007999449968338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,15,0.017498667041460674
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,31,0.01711999997496605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,31,0.019146667172511418
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,63,0.017258666455745697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,63,0.017125333348910015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,127,0.01926933353145917
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,127,0.017071999609470367
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,255,0.030837332208951313
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,255,0.017466666797796886
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,511,0.04885333279768626
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,511,0.031888000667095184
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,float16,1023,0.08569600184758504
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,48,8,128,1,float16,fp8,1023,0.050426666935284935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,float16,1,0.021194666624069214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,1,0.019333332777023315
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,3,0.019637333850065868
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,float16,3,0.022309333086013794
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,float16,7,0.021231998999913532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,7,0.019296000401178997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,float16,31,0.021146667500336964
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,15,0.019530666371186573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,float16,15,0.02180800090233485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,31,0.019226666539907455
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,float16,63,0.02123733361562093
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,63,0.019626667102177937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,float16,127,0.021130666136741638
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,127,0.019589333484570186
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,float16,255,0.03133333226044973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,float16,1,0.027471999327341717
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,4,128,1,float16,fp8,255,0.01921066641807556
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,1,0.025055999557177227
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,float16,3,0.027237333357334137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,3,0.02531733363866806
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,7,0.025194667279720306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,float16,7,0.027434666951497395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,float16,15,0.02739199995994568
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,15,0.025050667424996693
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,31,0.025450666745503742
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,float16,63,0.0271573339899381
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,63,0.02515733242034912
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,float16,127,0.03350933392842611
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,127,0.025045332809289295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,float16,31,0.02737066646416982
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,float16,255,0.04977599779764811
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,48,8,128,1,float16,fp8,255,0.03345600018898646
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,float16,1,0.03436800092458725
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,fp8,1,0.02935466667016347
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,float16,3,0.03444266567627589
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,fp8,3,0.029445332785447437
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,float16,7,0.03386666625738144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,fp8,7,0.029919999341169994
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,float16,15,0.034917332231998444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,fp8,15,0.02942933390537898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,float16,31,0.035232000052928925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,fp8,31,0.029722665747006733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,float16,63,0.03508266558249792
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,fp8,63,0.029487999776999157
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,float16,127,0.03716800113519033
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,4,128,1,float16,fp8,127,0.029898665845394135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,float16,1,0.048725331823031105
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,fp8,1,0.0415786678592364
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,float16,3,0.04934399823347727
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,fp8,3,0.04156800111134847
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,float16,7,0.04876266419887543
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,fp8,7,0.041706666350364685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,float16,15,0.04799466828505198
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,float16,31,0.049082666635513306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,fp8,31,0.04181333382924398
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,float16,63,0.04839999973773956
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,fp8,15,0.04223466912905375
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,1,0.009813333551088968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,1,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,3,0.009925333162148794
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,fp8,63,0.041450666884581246
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,3,0.011450666934251785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,7,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,15,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,float16,127,0.056320001681645714
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,15,0.009626666704813639
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,31,0.008890666688481966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,31,0.010597333312034607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,63,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,63,0.010693332801262537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,127,0.009546666716535887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,127,0.010677333921194077
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,255,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,48,8,128,1,float16,fp8,127,0.04563733438650767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,255,0.010640000303586325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,511,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,1023,0.011866666376590729
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,511,0.011141333729028702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,2047,0.012975999464591345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,2047,0.012917333592971167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,4095,0.015205333630243937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,4095,0.015247999380032221
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,7,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,8191,0.030591999491055805
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,8191,0.017488000293572743
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,float16,16383,0.048565333088239036
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,16383,0.03188266605138779
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,1,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,1,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,3,0.009365333244204521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,7,0.009285333255926767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,7,0.009632000078757605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,15,0.009392000113924345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,4,128,1,float16,fp8,1023,0.011205332974592844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,15,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,31,0.008938666433095932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,31,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,63,0.009519999846816063
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,63,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,127,0.010751999914646149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,127,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,255,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,255,0.011077333241701126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,511,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,3,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,511,0.01166933278242747
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,1023,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,2047,0.012938667088747025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,4095,0.029898665845394135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,2047,0.015520000209410986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,4095,0.014975999792416891
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,8191,0.04828799764315287
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,16383,0.08404800295829773
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,8191,0.03249066571394602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,fp8,16383,0.04952000081539154
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,48,8,128,1,float16,float16,1023,0.012768000364303589
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,4,128,1,float16,float16,1,0.06195199986298879
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,4,128,1,float16,float16,3,0.0621013343334198
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,4,128,1,float16,fp8,3,0.05017599960168203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,4,128,1,float16,fp8,1,0.05147733290990194
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,4,128,1,float16,float16,7,0.0620000014702479
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,4,128,1,float16,fp8,7,0.0497920016447703
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,4,128,1,float16,float16,15,0.06250666578610738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,4,128,1,float16,float16,31,0.06205866734186808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,4,128,1,float16,fp8,31,0.04983466863632202
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,4,128,1,float16,fp8,15,0.05089066425959269
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,4,128,1,float16,float16,63,0.062352001667022705
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,4,128,1,float16,fp8,63,0.04978133241335551
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,8,128,1,float16,float16,1,0.08602133393287659
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,8,128,1,float16,float16,3,0.08539199829101562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,8,128,1,float16,fp8,3,0.07494933406511943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,8,128,1,float16,float16,7,0.0867039958635966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,8,128,1,float16,fp8,1,0.07701866825421651
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,8,128,1,float16,fp8,7,0.07681599756081899
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,8,128,1,float16,float16,15,0.08602666854858398
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,8,128,1,float16,fp8,15,0.07602666815121968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,8,128,1,float16,fp8,31,0.07662400106589
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,8,128,1,float16,float16,63,0.08547733227411906
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,8,128,1,float16,fp8,63,0.07481599847475688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,48,8,128,1,float16,float16,31,0.08710933725039165
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,4,128,1,float16,float16,1,0.1135040024916331
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,4,128,1,float16,fp8,1,0.09316800038019817
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,4,128,1,float16,float16,3,0.11351999640464783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,4,128,1,float16,float16,7,0.11391466856002808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,4,128,1,float16,fp8,7,0.09302933017412822
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,4,128,1,float16,fp8,3,0.09332266449928284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,4,128,1,float16,float16,15,0.11341866850852966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,4,128,1,float16,fp8,15,0.09319466352462769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,4,128,1,float16,float16,31,0.11381866534550984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,4,128,1,float16,fp8,31,0.09269866347312927
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,8,128,1,float16,fp8,1,0.13916266957918802
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,8,128,1,float16,float16,1,0.160261332988739
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,8,128,1,float16,float16,3,0.15997333327929178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,8,128,1,float16,fp8,3,0.13949333628018698
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,8,128,1,float16,float16,7,0.16059199968973795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,8,128,1,float16,fp8,15,0.13942933082580566
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,8,128,1,float16,float16,15,0.15868266423543295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,1,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,1,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,3,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,3,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,7,0.010570666442314783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,7,0.011077333241701126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,15,0.010586666564146677
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,15,0.010687999427318573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,31,0.009306666751702627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,31,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,63,0.008943999807039896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,63,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,127,0.008879999940594038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,127,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,255,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,255,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,511,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,511,0.011264000087976456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,1023,0.012960000584522883
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,1023,0.012896000097195307
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,8,128,1,float16,fp8,7,0.1400213340918223
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,2047,0.014959999670584997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,2047,0.015279999623696009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,4095,0.030981334547201794
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,4095,0.017077332983414333
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,float16,8191,0.04994133114814758
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,4,128,1,float16,fp8,8191,0.032074667513370514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,8,128,1,float16,float16,31,0.16049066185951233
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,1,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,1,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,3,0.010602666685978571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,7,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,3,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,15,0.010591999938090643
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,7,0.011343999455372492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,15,0.010618666807810465
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,31,0.010656000425418219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,31,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,63,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,63,0.010693332801262537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,127,0.01145600030819575
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,255,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,48,8,128,1,float16,fp8,31,0.13929067055384317
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,255,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,511,0.012053333222866058
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,511,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,1023,0.012997332960367203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,1023,0.013616000612576803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,2047,0.02951466788848241
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,2047,0.015504000087579092
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,4095,0.047695999344189964
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,4095,0.030837332208951313
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,float16,8191,0.08362666765848796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,127,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,48,8,128,1,float16,fp8,8191,0.04862933357556661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,1,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,3,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,1,0.010629333555698395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,3,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,7,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,7,0.011077333241701126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,15,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,15,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,31,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,31,0.011136000355084738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,63,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,63,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,127,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,127,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,255,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,511,0.011274666835864386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,255,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,511,0.011488000551859537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,1023,0.013167999684810638
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,1023,0.012853333105643591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,2047,0.029818666477998097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,float16,4095,0.04807466765244802
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,4095,0.031717332700888314
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,1,0.011706666400035223
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,1,0.011194666226704916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,3,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,3,0.011125333607196808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,7,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,7,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,15,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,15,0.011285333583752314
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,31,0.011173332730929056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,31,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,63,0.011258666714032492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,63,0.011541333049535751
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,127,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,127,0.011168000598748526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,4,128,1,float16,fp8,2047,0.01706133286158244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,255,0.011909333368142446
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,255,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,511,0.01322666679819425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,511,0.013157332936922709
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,1023,0.015402667224407196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,2047,0.04828266799449921
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,1023,0.02958933264017105
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,2047,0.031712000568707786
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,1,0.009130666653315226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,1,0.00996800015370051
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,fp8,4095,0.048698668678601585
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,3,0.009296000003814697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,7,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,3,0.010368000095089277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,15,0.009445333232482275
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,15,0.011343999455372492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,31,0.00884799969693025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,31,0.010581333190202713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,63,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,127,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,63,0.011301333705584208
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,127,0.01003200002014637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,255,0.009813333551088968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,255,0.00978133330742518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,511,0.011354666203260422
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,511,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,1023,0.011178666104873022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,1023,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,2047,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,4095,0.01310933381319046
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,40,8,128,1,float16,float16,4095,0.08455999692281087
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,4095,0.012837332983811697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,8191,0.014954666296641031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,8191,0.017055999487638474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,7,0.009408000235756239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,float16,16383,0.017338667064905167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,16383,0.019002666076024372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,1,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,3,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,3,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,7,0.00966933307548364
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,7,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,15,0.00978133330742518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,4,128,1,float16,fp8,2047,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,15,0.010122666756312052
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,31,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,31,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,63,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,63,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,127,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,1,0.009248000259200731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,127,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,255,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,255,0.009328000247478485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,1023,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,1023,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,511,0.011146667102972666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,2047,0.010751999914646149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,2047,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,4095,0.013072000195582708
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,4095,0.013253333667914072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,8191,0.015029333531856537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,8191,0.01488000030318896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,16383,0.017370666066805523
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,fp8,16383,0.01720533271630605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,1,0.009029333169261614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,1,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,3,0.010762666662534079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,7,0.00884799969693025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,7,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,15,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,15,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,31,0.008965333302815756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,31,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,63,0.009423999736706415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,63,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,127,0.009429333110650381
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,127,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,255,0.00927466650803884
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,255,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,511,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,511,0.011098666737476984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,1023,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,1023,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,2047,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,2047,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,4095,0.014394666999578476
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,4095,0.01492799942692121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,8191,0.01544533297419548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,8191,0.015226667126019796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,16383,0.01806933308641116
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,fp8,16383,0.017312000195185345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,1,0.008821333448092142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,1,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,3,0.008869333192706108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,3,0.009130666653315226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,7,0.00890666681031386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,7,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,15,0.008805333326260248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,15,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,31,0.010037333394090334
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,31,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,40,8,128,1,float16,float16,511,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,63,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,63,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,127,0.008879999940594038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,127,0.009850666547815004
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,255,0.008890666688481966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,511,0.01080000028014183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,4,128,1,float16,float16,3,0.009408000235756239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,1023,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,1023,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,2047,0.012437333663304647
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,2047,0.013050666699806849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,4095,0.014853333433469137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,4095,0.015008000036080679
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,8191,0.01516266663869222
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,8191,0.015504000087579092
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,fp8,16383,0.01762666677435239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,16383,0.032613334556420646
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,1,0.012826666235923767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,1,0.012837332983811697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,255,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,3,0.012794667234023413
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,3,0.013066666821638743
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,7,0.012928000340859095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,7,0.01251199965675672
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,15,0.012831999609867731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,15,0.012805332740147909
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,31,0.012863999853531519
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,31,0.012928000340859095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,63,0.012928000340859095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,63,0.01292266696691513
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,127,0.012960000584522883
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,127,0.012757333616415659
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,255,0.011744000017642975
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,255,0.013082666943470636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,511,0.014965333044528961
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,40,8,128,1,float16,float16,511,0.011317333827416102
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,511,0.013194666554530462
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,1023,0.031386665999889374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,float16,2047,0.04842133323351542
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,2047,0.03254399945338567
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,4,128,1,float16,fp8,1023,0.017551999539136887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,1,0.013258667041858038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,1,0.013167999684810638
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,3,0.012986666212479273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,7,0.013973332941532135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,3,0.014208000153303146
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,7,0.012986666212479273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,15,0.014165333161751429
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,31,0.012991999586423239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,15,0.013663999736309052
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,31,0.014090667168299357
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,63,0.013077333569526672
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,127,0.013130666067202887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,255,0.013525333255529404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,255,0.01302933320403099
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,127,0.013440000514189402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,511,0.030623999734719593
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,511,0.017082666357358296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,1023,0.0498986691236496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,1023,0.03206400076548258
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,2047,0.08558400472005208
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,1,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,fp8,2047,0.05030400057633718
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,3,0.009008000294367472
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,1,0.010058666889866194
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,3,0.011418666690587997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,7,0.008965333302815756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,7,0.009450666606426239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,15,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,15,0.009248000259200731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,31,0.009445333232482275
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,63,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,63,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,31,0.01119999960064888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,127,0.009349333122372627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,127,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,255,0.011045332998037338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,255,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,511,0.011264000087976456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,511,0.011312000453472137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,1023,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,1023,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,2047,0.013541333377361298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,4095,0.015119999647140503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,8191,0.016810666769742966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,4095,0.015610666324694952
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,8191,0.0169813334941864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,float16,16383,0.03379199902216593
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,16383,0.019381333142518997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,40,8,128,1,float16,float16,63,0.014085333794355392
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,1,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,3,0.008874666566650072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,3,0.010298666854699453
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,7,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,7,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,15,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,15,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,31,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,31,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,63,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,4,128,1,float16,fp8,2047,0.01321600005030632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,63,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,127,0.010165333126982054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,127,0.010133333504199982
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,255,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,255,0.009477333476146063
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,511,0.011061333119869232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,511,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,1023,0.011424000064531961
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,1023,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,2047,0.013045333325862885
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,1,0.010170666500926018
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,2047,0.012815999488035837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,4095,0.01525866612792015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,4095,0.01309866706530253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,8191,0.029898665845394135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,8191,0.017407999684413273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,float16,16383,0.04810666541258494
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,1,0.014954666296641031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,3,0.014991999914248785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,1,0.015642666568358738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,3,0.014959999670584997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,7,0.014997333288192749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,7,0.014864000181357065
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,40,8,128,1,float16,fp8,16383,0.030960001051425934
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,15,0.014991999914248785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,15,0.015872000406185787
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,31,0.014885333677132925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,31,0.015141333142916361
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,63,0.014837333311637243
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,63,0.014896000425020853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,127,0.014970666418472925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,127,0.014970666418472925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,255,0.014869333555301031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,511,0.030943999687830608
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,511,0.019039999693632126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,255,0.015386667102575302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,float16,1023,0.04971200227737427
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,4,128,1,float16,fp8,1023,0.033413333197434746
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,1,0.017136000096797943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,1,0.018960000326236088
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,3,0.017055999487638474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,3,0.01876266673207283
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,7,0.01903466631968816
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,7,0.01692266638080279
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,15,0.018965333700180054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,31,0.0191040001809597
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,15,0.017018667111794155
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,31,0.01701333373785019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,63,0.018986667195955913
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,63,0.017029333859682083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,127,0.018144000321626663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,127,0.016970666746298473
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,255,0.03091199944416682
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,255,0.017429333180189133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,511,0.031498665610949196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,511,0.04845866560935974
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,float16,1023,0.08526933193206787
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,40,8,128,1,float16,fp8,1023,0.05073600014050802
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,float16,1,0.02146666745344798
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,float16,3,0.021354667842388153
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,1,0.019194666296243668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,float16,7,0.021488000949223835
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,3,0.01937599976857503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,7,0.019632000476121902
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,float16,15,0.021189334491888683
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,15,0.018917333334684372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,31,0.019285333653291065
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,float16,31,0.021333334346612293
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,float16,63,0.021727999051411945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,63,0.019173332800467808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,127,0.019189332922299702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,float16,127,0.021173333128293354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,float16,255,0.031770666440327965
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,4,128,1,float16,fp8,255,0.01929066702723503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,1,0.025631998976071674
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,float16,1,0.02743999908367793
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,3,0.025066666305065155
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,float16,7,0.027269333600997925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,7,0.02518400053183238
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,float16,3,0.027797333896160126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,float16,15,0.027269333600997925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,15,0.025221332907676697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,float16,31,0.02720000098148982
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,31,0.02521066615978877
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,float16,63,0.02720000098148982
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,63,0.02521066615978877
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,float16,127,0.03366400053103765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,127,0.025194667279720306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,float16,255,0.04958933095137278
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,40,8,128,1,float16,fp8,255,0.03244800120592117
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,float16,1,0.03505066782236099
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,fp8,1,0.029893333713213604
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,float16,3,0.03472000112136205
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,fp8,3,0.02938133229811986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,float16,7,0.03561066587766012
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,float16,15,0.03509866694609324
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,fp8,15,0.029487999776999157
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,float16,31,0.03549333413441976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,fp8,7,0.03035199890534083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,fp8,31,0.029253333806991577
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,float16,63,0.03555200000603994
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,fp8,63,0.029578665892283123
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,float16,127,0.0378560001651446
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,float16,1,0.04795733094215393
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,4,128,1,float16,fp8,127,0.029333333174387615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,fp8,1,0.04159466673930486
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,fp8,3,0.04144000013669332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,float16,3,0.04855999847253164
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,float16,7,0.047839999198913574
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,fp8,7,0.04154666761557261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,float16,15,0.04770133395989736
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,fp8,15,0.04119466741879781
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,float16,31,0.04790399968624115
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,fp8,31,0.041493333876132965
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,float16,63,0.04778666794300079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,fp8,63,0.04160533348719279
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,1,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,1,0.00983466642598311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,3,0.009690666571259499
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,3,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,7,0.009503999724984169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,7,0.009525333220760027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,15,0.009488000224033991
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,15,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,31,0.008938666433095932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,31,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,63,0.00878399983048439
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,63,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,127,0.009375999992092451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,127,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,255,0.008842666943868002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,255,0.00961599995692571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,511,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,511,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,1023,0.011183999478816986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,1023,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,2047,0.013167999684810638
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,2047,0.01303999995191892
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,float16,127,0.05658666789531708
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,4095,0.015205333630243937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,4095,0.015024000157912573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,8191,0.01718933383623759
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,8191,0.03172266731659571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,float16,16383,0.0484799991051356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,40,8,128,1,float16,fp8,127,0.045781334241231285
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,4,128,1,float16,fp8,16383,0.03254933406909307
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,1,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,1,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,3,0.00926399976015091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,3,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,7,0.009413333609700203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,7,0.009482666850090027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,15,0.011402666568756104
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,15,0.009872000043590864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,31,0.009679999823371569
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,63,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,63,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,127,0.010565333068370819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,255,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,127,0.01128000020980835
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,255,0.010597333312034607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,511,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,1023,0.012815999488035837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,1023,0.012784000486135483
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,2047,0.01509333277742068
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,2047,0.0129120002190272
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,4095,0.016352000335852306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,4095,0.03028800090154012
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,8191,0.04808000226815542
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,8191,0.03209600100914637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,31,0.009466666728258133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,float16,16383,0.08326933284600575
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,511,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,40,8,128,1,float16,fp8,16383,0.0498986691236496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,4,128,1,float16,fp8,1,0.05016533533732096
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,4,128,1,float16,float16,3,0.0625493327776591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,4,128,1,float16,float16,1,0.06237866481145223
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,4,128,1,float16,float16,7,0.06242666641871134
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,4,128,1,float16,fp8,3,0.05007466673851013
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,4,128,1,float16,fp8,7,0.049738665421803795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,4,128,1,float16,float16,15,0.06250666578610738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,4,128,1,float16,fp8,15,0.04993066688378652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,4,128,1,float16,float16,31,0.06204266846179962
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,4,128,1,float16,float16,63,0.06272533535957336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,4,128,1,float16,fp8,31,0.04993600149949392
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,4,128,1,float16,fp8,63,0.050373335679372154
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,8,128,1,float16,float16,1,0.08471999565760295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,8,128,1,float16,float16,3,0.08620799581209819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,8,128,1,float16,fp8,3,0.07572799921035767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,8,128,1,float16,float16,7,0.08577600121498108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,8,128,1,float16,fp8,1,0.07479999959468842
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,8,128,1,float16,fp8,15,0.07632533212502797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,8,128,1,float16,float16,15,0.08565866947174072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,8,128,1,float16,fp8,7,0.07658666869004567
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,8,128,1,float16,float16,31,0.08487466971079509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,8,128,1,float16,fp8,31,0.0746666689713796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,8,128,1,float16,float16,63,0.08514133095741272
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,40,8,128,1,float16,fp8,63,0.07562133173147838
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,4,128,1,float16,fp8,1,0.09289600451787312
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,4,128,1,float16,float16,3,0.11384532848993938
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,4,128,1,float16,fp8,3,0.09334933757781982
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,4,128,1,float16,float16,7,0.11352533102035522
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,4,128,1,float16,fp8,7,0.09275733431180318
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,4,128,1,float16,float16,15,0.11433600385983785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,4,128,1,float16,float16,1,0.11588266491889954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,4,128,1,float16,fp8,15,0.09277333815892537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,4,128,1,float16,float16,31,0.11333333452542622
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,4,128,1,float16,fp8,31,0.09340266386667888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,8,128,1,float16,float16,1,0.15896000464757284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,8,128,1,float16,fp8,1,0.13820800185203552
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,8,128,1,float16,float16,3,0.15895467003186545
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,8,128,1,float16,fp8,3,0.14017066359519958
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,8,128,1,float16,float16,7,0.1593226691087087
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,8,128,1,float16,fp8,7,0.13991467157999674
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,8,128,1,float16,float16,15,0.1585653324921926
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,1,0.009253333633144697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,1,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,3,0.009296000003814697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,3,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,7,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,7,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,8,128,1,float16,fp8,15,0.14012799660364786
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,15,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,31,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,15,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,31,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,63,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,63,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,127,0.010677333921194077
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,127,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,255,0.009455999980370203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,255,0.011178666104873022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,511,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,511,0.011146667102972666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,1023,0.01313599944114685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,1023,0.013002666334311167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,2047,0.015360000232855478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,2047,0.014954666296641031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,4095,0.03154666721820831
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,4095,0.017498667041460674
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,float16,8191,0.049551998575528465
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,4,128,1,float16,fp8,8191,0.0322773332397143
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,1,0.010522666076819101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,1,0.010661333799362183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,3,0.010709332923094431
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,3,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,7,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,7,0.01033599985142549
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,8,128,1,float16,float16,31,0.15983466307322183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,15,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,15,0.011525332927703857
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,31,0.011125333607196808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,31,0.011359999577204386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,63,0.010677333921194077
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,40,8,128,1,float16,fp8,31,0.1392159958680471
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,63,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,127,0.01129066695769628
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,127,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,255,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,255,0.010703999549150467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,511,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,511,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,1023,0.013221333424250284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,1023,0.012773333738247553
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,2047,0.029946667452653248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,2047,0.014949332922697067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,4095,0.04747733473777771
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,4095,0.03051200012365977
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,float16,8191,0.08339732885360718
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,40,8,128,1,float16,fp8,8191,0.048613334695498146
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,1,0.009904000287254652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,3,0.00983466642598311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,1,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,7,0.010490667074918747
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,7,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,15,0.011061333119869232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,15,0.0103946669648091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,31,0.009712000067035357
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,31,0.011231999844312668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,63,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,63,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,127,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,127,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,255,0.010687999427318573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,255,0.010709332923094431
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,511,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,511,0.013125333935022354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,1023,0.013114667187134424
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,1023,0.013114667187134424
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,2047,0.015082667271296183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,2047,0.014981333166360855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,4095,0.031680000325044
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,3,0.010245333115259806
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,4095,0.018458666900793713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,float16,8191,0.04975999891757965
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,2,128,1,float16,fp8,8191,0.03312533348798752
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,1,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,3,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,3,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,7,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,7,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,15,0.010591999938090643
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,15,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,31,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,31,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,63,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,63,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,127,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,127,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,255,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,255,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,511,0.011109333485364914
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,511,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,1,0.009578666960199675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,1023,0.013157332936922709
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,1023,0.012768000364303589
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,2047,0.029535998900731403
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,2047,0.015274666249752045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,4095,0.04818133513132731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,4095,0.030432000756263733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,float16,8191,0.08285866677761078
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,4,128,1,float16,fp8,8191,0.049216002225875854
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,1,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,1,0.011301333705584208
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,3,0.01118933285276095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,3,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,7,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,7,0.011365332951148352
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,15,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,15,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,31,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,31,0.011413333316644033
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,63,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,63,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,127,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,127,0.011920000116030375
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,255,0.011136000355084738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,255,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,511,0.013007999708255133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,511,0.015141333142916361
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,1023,0.029989334444204967
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,1023,0.015135999768972397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,2047,0.03126933425664902
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,4095,0.08457600076993306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,4095,0.04889066517353058
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,8191,0.1566986640294393
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,fp8,8191,0.0860746701558431
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,1,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,3,0.00919999989370505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,3,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,7,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,7,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,15,0.009061333412925402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,15,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,31,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,31,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,63,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,63,0.00949866697192192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,127,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,127,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,255,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,32,8,128,1,float16,float16,2047,0.04841599861780802
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,511,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,1023,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,511,0.01129066695769628
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,1023,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,2047,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,2047,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,4095,0.011594666788975397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,4095,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,8191,0.015311999867359797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,8191,0.015034666905800501
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,16383,0.02075733368595441
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,1,0.010234666367371878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,16383,0.019205333044131596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,float16,32767,0.023311999936898548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,32767,0.021354667842388153
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,1,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,1,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,3,0.00926399976015091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,3,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,7,0.00972800018886725
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,7,0.008992000172535578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,15,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,15,0.010693332801262537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,31,0.008826666822036108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,63,0.009578666960199675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,63,0.009242666885256767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,127,0.010405333091815313
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,127,0.009301333377758661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,2,128,1,float16,fp8,255,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,255,0.010079999764760336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,255,0.009093333035707474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,511,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,511,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,1023,0.009418666362762451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,1023,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,2047,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,2047,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,4095,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,4095,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,8191,0.015135999768972397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,16383,0.017632000148296356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,16383,0.016917333006858826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,float16,32767,0.019493332753578823
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,32767,0.01754666616519292
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,1,0.00949866697192192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,1,0.008992000172535578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,3,0.008896000062425932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,31,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,7,0.008912000184257826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,7,0.010565333068370819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,15,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,15,0.010522666076819101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,31,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,31,0.01051733394463857
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,63,0.009093333035707474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,63,0.01055466632048289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,127,0.00903466654320558
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,4,128,1,float16,fp8,8191,0.015189333508412043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,127,0.009397333487868309
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,255,0.008901333436369896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,255,0.009872000043590864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,511,0.01080000028014183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,511,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,1023,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,2047,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,2047,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,4095,0.012869333227475485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,4095,0.012869333227475485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,8191,0.014991999914248785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,8191,0.014794666320085526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,16383,0.016970666746298473
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,16383,0.01720533271630605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,32767,0.03339199970165888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,32767,0.01740266631046931
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,1,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,1,0.01080000028014183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,3,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,3,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,fp8,1023,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,7,0.00891733355820179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,7,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,32,8,128,1,float16,float16,3,0.009402666861812273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,15,0.00960533320903778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,31,0.009519999846816063
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,63,0.010666667173306147
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,63,0.010319999729593595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,127,0.010687999427318573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,127,0.010304000228643417
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,255,0.010645333677530289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,511,0.011237333218256632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,511,0.011215999722480774
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,1023,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,1023,0.011136000355084738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,2047,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,2047,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,4095,0.011253333340088526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,15,0.009658666948477427
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,4095,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,8191,0.016858667135238647
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,8191,0.016837333639462788
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,16383,0.019120000302791595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,32767,0.02144533395767212
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,32767,0.020917333662509918
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,255,0.010026666646202406
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,1,0.009733333562811216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,1,0.00901333304742972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,3,0.009226666763424873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,3,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,7,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,7,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,15,0.008842666943868002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,15,0.008986666798591614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,31,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,31,0.008821333448092142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,63,0.00891733355820179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,63,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,127,0.008997333546479544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,127,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,255,0.009242666885256767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,fp8,31,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,255,0.008837333569924036
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,511,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,511,0.010656000425418219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,1023,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,1023,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,2047,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,4095,0.013050666699806849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,4095,0.013130666067202887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,8191,0.01522133375207583
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,8191,0.015130666395028433
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,16383,0.017418666432301205
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,float16,32767,0.03229333211978277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,32767,0.018677332748969395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,1,0.009205333267649015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,1,0.009813333551088968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,3,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,2,128,1,float16,float16,16383,0.01905599981546402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,3,0.010703999549150467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,7,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,7,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,2047,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,15,0.008799999952316284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,15,0.010693332801262537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,31,0.009002666920423508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,31,0.008842666943868002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,63,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,4,128,1,float16,fp8,16383,0.016864000509182613
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,127,0.008826666822036108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,127,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,255,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,255,0.009253333633144697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,511,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,511,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,1023,0.010480000327030817
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,1023,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,2047,0.012896000097195307
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,2047,0.013056000073750814
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,4095,0.015008000036080679
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,4095,0.013397333522637686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,8191,0.015909332782030106
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,8191,0.014901333798964819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,16383,0.031717332700888314
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,16383,0.017375999440749485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,32767,0.049738665421803795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,fp8,32767,0.03357866654793421
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,1,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,1,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,3,0.010693332801262537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,32,8,128,1,float16,float16,63,0.009626666704813639
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,3,0.010538666198650995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,7,0.010112000008424124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,7,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,15,0.01062400018175443
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,15,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,31,0.009914666414260864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,63,0.010512000570694605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,63,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,127,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,127,0.010762666662534079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,255,0.011168000598748526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,255,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,511,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,511,0.011317333827416102
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,1023,0.01301866645614306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,31,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,2047,0.030229332546393078
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,1023,0.01357866699496905
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,2047,0.01708799973130226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,float16,4095,0.04938666522502899
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,2,128,1,float16,fp8,4095,0.03128000100453695
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,1,0.01145600030819575
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,3,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,7,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,7,0.011125333607196808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,15,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,15,0.011146667102972666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,31,0.01145600030819575
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,31,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,63,0.0124746672809124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,1,0.011045332998037338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,63,0.011418666690587997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,127,0.011989332735538483
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,127,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,255,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,255,0.011343999455372492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,511,0.014767999450365702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,511,0.013056000073750814
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,1023,0.030224000414212544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,1023,0.015072000523408255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,2047,0.048672000567118325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,3,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,2047,0.031199999153614044
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,float16,4095,0.08473599950472514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,4,128,1,float16,fp8,4095,0.049866666396458946
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,1,0.012885333349307379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,1,0.013173333058754602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,3,0.012970666090647379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,3,0.013552000125249227
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,7,0.012906666845083237
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,7,0.013093333691358566
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,15,0.01322666679819425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,15,0.012949333836634954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,31,0.013157332936922709
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,31,0.013359999905029932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,63,0.01482133318980535
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,63,0.012885333349307379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,127,0.0129120002190272
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,127,0.01339200014869372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,255,0.01492799942692121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,255,0.012960000584522883
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,511,0.02991466720898946
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,1023,0.049546668926874794
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,1023,0.03134933362404505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,511,0.016821333517630894
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,2047,0.08609066406885783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,2047,0.05101333558559418
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,1,0.009130666653315226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,float16,4095,0.1588586668173472
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,3,0.00938666673998038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,1,0.009535999968647957
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,32,8,128,1,float16,fp8,4095,0.0857919951279958
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,7,0.008992000172535578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,15,0.011653333902359009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,15,0.009999999776482582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,31,0.010677333921194077
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,31,0.009877333417534828
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,63,0.010640000303586325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,63,0.010431999961535135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,127,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,255,0.010650667051474253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,255,0.010650667051474253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,511,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,511,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,1023,0.011173332730929056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,1023,0.010527999450763067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,2047,0.011391999820868174
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,3,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,4095,0.014906667172908783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,2047,0.011877333124478659
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,4095,0.014991999914248785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,8191,0.017093333105246227
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,8191,0.016597333053747814
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,16383,0.018800000349680584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,127,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,float16,32767,0.03418133407831192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,32767,0.02013333390156428
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,1,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,3,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,3,0.0106133334338665
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,7,0.00961599995692571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,7,0.010319999729593595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,7,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,15,0.010117333382368088
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,15,0.010117333382368088
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,31,0.008965333302815756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,2,128,1,float16,fp8,16383,0.01732800031701724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,63,0.008938666433095932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,1,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,63,0.00892800030608972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,127,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,127,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,255,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,511,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,511,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,1023,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,1023,0.011674666156371435
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,2047,0.012719999998807907
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,2047,0.013082666943470636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,4095,0.01322666679819425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,31,0.009824000298976898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,4095,0.012997332960367203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,8191,0.015429332852363586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,16383,0.032058666149775185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,16383,0.0170666662355264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,32767,0.03345600018898646
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,float16,32767,0.05009066561857859
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,1,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,255,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,3,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,3,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,7,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,7,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,15,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,15,0.008879999940594038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,31,0.009130666653315226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,31,0.008912000184257826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,63,0.009610666582981745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,63,0.01028266673286756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,127,0.00983466642598311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,127,0.00996800015370051
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,4,128,1,float16,fp8,8191,0.015168000012636185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,255,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,511,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,511,0.011152000476916632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,1,0.008885333314538002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,1023,0.011221333096424738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,1023,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,2047,0.012586666891972223
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,2047,0.012954667210578918
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,4095,0.014688000082969666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,4095,0.0129120002190272
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,8191,0.029968000948429108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,8191,0.015749332805474598
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,16383,0.0481333335240682
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,16383,0.031898667414983116
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,float16,32767,0.08438400427500407
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,32767,0.04886933167775472
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,1,0.012703999876976013
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,1,0.012789333860079447
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,3,0.013557333499193192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,32,8,128,1,float16,fp8,255,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,3,0.013183999806642532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,7,0.012869333227475485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,15,0.012858666479587555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,15,0.013183999806642532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,31,0.012821332861979803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,31,0.011941333611806234
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,63,0.012730666746695837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,63,0.012842666357755661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,7,0.012890666723251343
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,127,0.012362666428089142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,127,0.013301332791646322
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,255,0.012901333471139273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,255,0.01301866645614306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,511,0.015376000354687372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,511,0.01505600040157636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,1023,0.031471999982992806
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,1023,0.0176959993938605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,float16,2047,0.05054399867852529
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,2,128,1,float16,fp8,2047,0.033088001112143196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,1,0.013280000537633896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,1,0.01394133393963178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,7,0.013749333719412485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,7,0.013823999712864557
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,3,0.013834666460752487
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,3,0.013258667041858038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,15,0.013936000565687815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,15,0.01322666679819425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,31,0.013861333330472311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,31,0.013370666652917862
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,63,0.014752000570297241
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,63,0.013189333180586496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,127,0.0145066666106383
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,255,0.013562666873137156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,255,0.01314666618903478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,511,0.031221332649389904
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,511,0.016938666502634685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,1023,0.0317546675602595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,1023,0.049402669072151184
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,float16,2047,0.08619200189908345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,127,0.013306666165590286
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,4,128,1,float16,fp8,2047,0.05118933320045471
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,1,0.018901333212852478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,1,0.017077332983414333
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,3,0.01905599981546402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,3,0.017162666966517765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,7,0.018911999960740406
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,7,0.01735466718673706
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,15,0.01924266666173935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,15,0.01714666684468587
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,31,0.018992000569899876
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,31,0.017509333789348602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,63,0.019141333798567455
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,63,0.017701332767804463
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,127,0.017429333180189133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,127,0.019237333287795384
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,255,0.03033600002527237
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,255,0.017551999539136887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,511,0.031957333286603294
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,1023,0.08468266328175862
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,2047,0.08569600184758504
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,fp8,1023,0.04961066444714864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,2047,0.15595733126004538
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,32,8,128,1,float16,float16,511,0.049253334601720176
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,1,0.014848000059525171
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,1,0.015066667149464289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,3,0.015306666493415833
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,7,0.014912000546852747
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,3,0.014906667172908783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,7,0.014997333288192749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,15,0.015157333264748255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,15,0.01516266663869222
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,31,0.01509333277742068
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,31,0.014858666807413101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,63,0.015813333292802174
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,63,0.015050667027632395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,127,0.015615999698638916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,127,0.014959999670584997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,255,0.015450666348139444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,255,0.014938666174809137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,fp8,511,0.019194666296243668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,2,128,1,float16,float16,511,0.0315786674618721
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,1,0.01918399954835574
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,1,0.017152000218629837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,3,0.018496000518401463
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,3,0.017317333569129307
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,7,0.019039999693632126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,7,0.017077332983414333
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,15,0.01848000039656957
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,15,0.017269333203633625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,31,0.019023999571800232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,31,0.017162666966517765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,63,0.019120000302791595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,63,0.017338667064905167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,127,0.01933866615096728
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,127,0.01709866647919019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,255,0.030266667405764263
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,255,0.017221332838137943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,float16,511,0.0498986691236496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,4,128,1,float16,fp8,511,0.032144000132878624
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,1,0.027237333357334137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,1,0.0252960001428922
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,7,0.027376001079877216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,7,0.02514133354028066
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,3,0.023503998915354412
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,3,0.02756800005833308
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,15,0.02735466758410136
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,15,0.025360000630219776
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,31,0.027263998985290527
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,31,0.025045332809289295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,63,0.027349332968393963
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,63,0.025077333052953083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,127,0.032405334214369454
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,127,0.02534399926662445
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,255,0.05035200218359629
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,255,0.03294933338960012
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,float16,511,0.08503466844558716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,32,8,128,1,float16,fp8,511,0.05043200155099233
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,float16,1,0.021301334102948506
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,fp8,1,0.019306667149066925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,float16,3,0.021530665457248688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,fp8,3,0.01905599981546402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,float16,7,0.02147199958562851
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,fp8,7,0.019205333044131596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,float16,15,0.021333334346612293
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,fp8,15,0.019386666516462963
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,float16,31,0.021226666867733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,fp8,31,0.01930133377512296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,float16,63,0.021365332106749218
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,fp8,63,0.019359999646743137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,float16,127,0.021375998854637146
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,fp8,127,0.01937066639463107
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,float16,255,0.03164266546567281
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,2,128,1,float16,fp8,255,0.01932799940307935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,float16,1,0.02735466758410136
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,fp8,1,0.026357332865397137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,float16,3,0.027893332143624622
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,fp8,3,0.025648000339667004
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,float16,7,0.02920000006755193
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,fp8,7,0.02703999976317088
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,float16,15,0.02773866554101308
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,fp8,15,0.025968000292778015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,float16,31,0.02922666569550832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,fp8,31,0.027237333357334137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,float16,63,0.02752000093460083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,fp8,63,0.025626666843891144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,float16,127,0.035989334185918175
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,fp8,127,0.026234666506449383
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,float16,255,0.052095999320348106
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,float16,1,0.04827733337879181
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,fp8,1,0.040005333721637726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,4,128,1,float16,fp8,255,0.036330667634805046
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,float16,3,0.047983999053637184
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,fp8,3,0.040037333965301514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,float16,7,0.04779199759165446
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,fp8,7,0.041221333046754204
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,float16,15,0.04821866750717163
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,fp8,15,0.04005866746107737
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,float16,31,0.04809066653251648
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,fp8,31,0.04082666585842768
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,float16,63,0.0481279989083608
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,fp8,63,0.039877332746982574
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,float16,127,0.05569066603978475
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,1,0.00891733355820179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,1,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,3,0.008901333436369896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,3,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,7,0.009461333354314169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,7,0.010197333370645842
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,fp8,127,0.04477333525816599
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,15,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,15,0.01033599985142549
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,31,0.010277333358923594
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,31,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,63,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,63,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,127,0.009232000137368837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,127,0.011205332974592844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,255,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,255,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,float16,255,0.08798399567604065
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,511,0.011045332998037338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,32,8,128,1,float16,fp8,255,0.05619733532269796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,511,0.011445333560307821
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,1023,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,2047,0.014725333700577417
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,4095,0.015141333142916361
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,4095,0.01640533283352852
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,1023,0.011109333485364914
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,8191,0.01709866647919019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,2047,0.01333333303531011
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,8191,0.01701333373785019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,16383,0.03331200033426285
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,16383,0.019424000134070713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,float16,32767,0.05162666738033295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,1,0.008869333192706108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,1,0.009813333551088968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,3,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,7,0.008799999952316284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,7,0.01051733394463857
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,15,0.00897066667675972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,15,0.010064000263810158
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,31,0.00874133345981439
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,31,0.010581333190202713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,63,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,63,0.009898666913310686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,127,0.008816000074148178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,127,0.010725333044926325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,255,0.008805333326260248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,255,0.010288000106811523
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,511,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,511,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,1023,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,1023,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,2047,0.012250666817029318
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,2047,0.01129066695769628
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,2,128,1,float16,fp8,32767,0.03417066733042399
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,4095,0.012938667088747025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,4095,0.013557333499193192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,3,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,8191,0.03019733230272929
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,8191,0.015157333264748255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,16383,0.04799999793370565
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,float16,32767,0.0844533344109853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,32767,0.0499893327554067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,1,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,1,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,3,0.00890666681031386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,3,0.010575999816258749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,7,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,15,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,15,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,31,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,31,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,63,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,63,0.010703999549150467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,127,0.009461333354314169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,127,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,255,0.008874666566650072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,255,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,4,128,1,float16,fp8,16383,0.032186667124430336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,511,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,511,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,1023,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,1023,0.012815999488035837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,2047,0.01313599944114685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,2047,0.01302933320403099
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,7,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,4095,0.015386667102575302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,4095,0.029968000948429108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,8191,0.031199999153614044
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,8191,0.04891733328501383
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,16383,0.08462400237719218
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,16383,0.049039999643961586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,fp8,32767,0.08508800466855367
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,32,8,128,1,float16,float16,32767,0.15646933515866598
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,float16,1,0.0335413341720899
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,fp8,1,0.029680001238981884
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,fp8,3,0.029663999875386555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,float16,3,0.03568533311287562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,float16,7,0.034160000582536064
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,fp8,7,0.02959999938805898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,float16,15,0.034671999514102936
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,float16,31,0.03559466699759165
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,fp8,31,0.02951466788848241
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,float16,63,0.03454933315515518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,fp8,15,0.02975466599067052
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,fp8,63,0.029530666768550873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,float16,127,0.03805333375930786
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,2,128,1,float16,fp8,127,0.030069333811601002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,float16,1,0.05046399931112925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,float16,3,0.05065066615740458
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,fp8,3,0.042405332128206887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,fp8,1,0.04398933549722036
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,float16,7,0.05052266518274943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,fp8,7,0.04221866528193156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,float16,15,0.05062933266162872
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,float16,31,0.050197333097457886
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,fp8,31,0.04417066772778829
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,fp8,15,0.04388799766699473
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,float16,63,0.05021866659323374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,fp8,63,0.041984001795450844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,float16,127,0.0582239975531896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,4,128,1,float16,fp8,127,0.0479360024134318
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,float16,1,0.0848640004793803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,fp8,1,0.07434666653474171
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,float16,3,0.08637866377830505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,fp8,3,0.07448000212510426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,fp8,7,0.07430399954319
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,float16,15,0.08582400282224019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,float16,7,0.08578133583068848
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,fp8,15,0.07436266541481018
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,float16,31,0.08542399605115254
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,fp8,31,0.07468266785144806
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,fp8,63,0.07461333274841309
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,float16,63,0.08665600419044495
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,float16,127,0.09893332918485005
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,32,8,128,1,float16,fp8,127,0.07874666651089986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,2,128,1,float16,float16,1,0.06277333199977875
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,2,128,1,float16,float16,3,0.062074666221936546
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,2,128,1,float16,fp8,3,0.050853331883748375
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,2,128,1,float16,fp8,1,0.052229334910710655
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,2,128,1,float16,float16,7,0.06238399942715963
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,2,128,1,float16,fp8,7,0.05143466591835022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,2,128,1,float16,float16,15,0.06235733131567637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,2,128,1,float16,fp8,15,0.05159999926884969
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,2,128,1,float16,float16,31,0.0622026671965917
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,2,128,1,float16,fp8,31,0.05006400247414907
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,2,128,1,float16,float16,63,0.06257066627343495
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,4,128,1,float16,float16,1,0.08915733297665913
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,2,128,1,float16,fp8,63,0.05004799862702688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,4,128,1,float16,fp8,1,0.08061333497365315
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,4,128,1,float16,float16,3,0.09083732962608337
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,4,128,1,float16,fp8,3,0.08060266574223836
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,4,128,1,float16,float16,7,0.09065066774686177
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,4,128,1,float16,fp8,7,0.08110933502515157
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,4,128,1,float16,float16,15,0.09000533819198608
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,4,128,1,float16,fp8,15,0.08075200021266937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,4,128,1,float16,float16,31,0.09129599730173747
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,4,128,1,float16,fp8,31,0.08130666613578796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,8,128,1,float16,float16,1,0.15968533356984457
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,4,128,1,float16,fp8,63,0.08080000181992848
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,4,128,1,float16,float16,63,0.09051199754079182
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,8,128,1,float16,fp8,1,0.13614400227864584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,8,128,1,float16,float16,3,0.15888532996177673
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,8,128,1,float16,fp8,3,0.13802133003870645
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,8,128,1,float16,float16,7,0.1590986649195353
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,8,128,1,float16,fp8,7,0.1372266709804535
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,8,128,1,float16,float16,15,0.15837333599726358
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,8,128,1,float16,fp8,15,0.1362933317820231
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,8,128,1,float16,float16,31,0.1601653297742208
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,1,0.00997866690158844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,1,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,3,0.010618666807810465
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,3,0.01055466632048289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,7,0.009434666484594345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,7,0.010384000216921171
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,15,0.009365333244204521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,15,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,31,0.009557333464423815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,31,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,63,0.009541333342591921
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,8,128,1,float16,fp8,31,0.13638933499654135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,127,0.010496000448862711
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,63,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,127,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,255,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,255,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,511,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,511,0.011178666104873022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,1023,0.011168000598748526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,1023,0.012639999389648438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,2047,0.01309866706530253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,2047,0.013493333011865616
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,4095,0.015024000157912573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,4095,0.01544533297419548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,8191,0.03146133323510488
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,8191,0.018005333840847015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,float16,16383,0.049914668003718056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,8,128,1,float16,float16,63,0.15898133317629495
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,2,128,1,float16,fp8,16383,0.03365866591533025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,1,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,1,0.009503999724984169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,3,0.008890666688481966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,7,0.009328000247478485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,7,0.00984533317387104
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,3,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,15,0.009455999980370203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,15,0.009525333220760027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,31,0.009578666960199675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,31,0.009621333330869675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,63,0.00891733355820179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,63,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,127,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,127,0.009632000078757605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,255,0.009722666814923286
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,255,0.008767999708652496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,511,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,511,0.011359999577204386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,1023,0.011482667177915573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,1023,0.01192533348997434
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,2047,0.01331199953953425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,2047,0.013082666943470636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,4095,0.030693332354227703
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,4095,0.015594666202863058
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,8191,0.048165331284205117
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,8191,0.03183999905983607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,float16,16383,0.08475200335184734
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,4,128,1,float16,fp8,16383,0.050399998823801674
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,1,0.010725333044926325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,1,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,3,0.01071999967098236
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,3,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,7,0.0100853331387043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,7,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,15,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,32,8,128,1,float16,fp8,63,0.13807466626167297
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,15,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,31,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,31,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,63,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,63,0.009941333283980688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,127,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,127,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,255,0.010026666646202406
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,511,0.011141333729028702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,1023,0.013274667163689932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,255,0.011274666835864386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,1023,0.013514666507641474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,2047,0.029482667644818623
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,4095,0.04866133133570353
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,2047,0.015578666081031164
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,4095,0.030058667063713074
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,8191,0.08357333143552144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,8191,0.04909333089987437
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,float16,16383,0.15506133437156677
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,16383,0.0841493308544159
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,1,0.009839999799927076
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,1,0.011178666104873022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,3,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,3,0.010735999792814255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,7,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,7,0.011237333218256632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,32,8,128,1,float16,fp8,511,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,15,0.009322666873534521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,15,0.011381333072980246
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,31,0.010570666442314783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,31,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,63,0.009786666681369146
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,63,0.011760000139474869
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,127,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,127,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,255,0.009450666606426239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,255,0.010634666929642359
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,511,0.011488000551859537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,511,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,1023,0.01309866706530253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,1023,0.012831999609867731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,2047,0.015013333410024643
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,2047,0.01509333277742068
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,4095,0.03164800008138021
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,4095,0.017690667261679966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,fp8,8191,0.032511999209721885
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,2,128,1,float16,float16,8191,0.05014933149019877
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,1,0.010005333150426546
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,1,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,3,0.010661333799362183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,3,0.010389333590865135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,7,0.010666667173306147
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,7,0.010762666662534079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,15,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,15,0.01055466632048289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,31,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,63,0.01062400018175443
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,63,0.010533332824707031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,127,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,127,0.010496000448862711
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,255,0.01080000028014183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,255,0.010549332946538925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,511,0.011146667102972666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,511,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,1023,0.01303999995191892
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,1023,0.012991999586423239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,2047,0.029232000311215717
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,2047,0.015013333410024643
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,31,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,4095,0.048021331429481506
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,4095,0.030778666337331135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,fp8,8191,0.04884799818197886
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,1,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,1,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,3,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,4,128,1,float16,float16,8191,0.08363733688990276
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,3,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,7,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,7,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,15,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,15,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,31,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,31,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,63,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,127,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,127,0.011098666737476984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,255,0.011178666104873022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,255,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,511,0.013248000293970108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,511,0.013477332890033722
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,1023,0.02924799919128418
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,1023,0.015184000134468079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,2047,0.030666666726271313
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,2047,0.04795733094215393
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,4095,0.08409600456555684
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,4095,0.04902400076389313
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,float16,8191,0.15678399801254272
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,1,0.00877333308259646
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,8191,0.08543466528256734
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,3,0.008816000074148178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,1,0.010666667173306147
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,3,0.010186666622757912
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,24,8,128,1,float16,fp8,63,0.011152000476916632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,7,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,15,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,31,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,15,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,31,0.00960533320903778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,63,0.00884799969693025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,63,0.010634666929642359
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,127,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,127,0.009888000165422758
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,255,0.010640000303586325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,255,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,511,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,511,0.011109333485364914
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,1023,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,1023,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,2047,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,2047,0.010581333190202713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,4095,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,4095,0.01121066634853681
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,8191,0.01524266724785169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,16383,0.019738666713237762
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,16383,0.01932799940307935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,32767,0.024362665911515553
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,32767,0.021104000508785248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,1,0.01003200002014637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,1,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,3,0.0102186668664217
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,3,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,7,0.010112000008424124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,7,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,float16,7,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,15,0.009450666606426239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,15,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,31,0.008778666456540426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,31,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,63,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,63,0.009375999992092451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,127,0.009877333417534828
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,127,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,255,0.008629333227872849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,255,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,511,0.01044800008336703
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,511,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,1023,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,1023,0.010608000059922537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,2047,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,2047,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,2,128,1,float16,fp8,8191,0.016490666816631954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,4095,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,4095,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,8191,0.015344000111023584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,8191,0.01498666654030482
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,16383,0.017386666188637417
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,16383,0.017360000560681026
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,float16,32767,0.019248000035683315
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,4,128,1,float16,fp8,32767,0.018976000448067982
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,1,0.008826666822036108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,3,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,3,0.008976000050703684
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,7,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,7,0.008901333436369896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,15,0.008842666943868002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,15,0.010629333555698395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,31,0.009008000294367472
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,31,0.010186666622757912
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,63,0.00891733355820179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,127,0.0100426667680343
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,127,0.008816000074148178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,255,0.010064000263810158
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,255,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,511,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,1023,0.010805333654085795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,1,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,1023,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,2047,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,2047,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,4095,0.012810666114091873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,4095,0.013173333058754602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,8191,0.014943999548753103
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,8191,0.014922666052977243
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,16383,0.01695999999841054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,16383,0.016949333250522614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,32767,0.033717334270477295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,63,0.009632000078757605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,1,0.009194666519761086
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,1,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,3,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,3,0.00973866693675518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,7,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,float16,511,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,15,0.008762666955590248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,31,0.00878399983048439
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,31,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,63,0.008799999952316284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,63,0.008976000050703684
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,127,0.00879466657837232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,127,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,255,0.008821333448092142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,255,0.009962666779756546
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,24,8,128,1,float16,fp8,32767,0.01736533393462499
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,511,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,1023,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,1023,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,2047,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,7,0.009839999799927076
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,2047,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,4095,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,15,0.010608000059922537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,4095,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,8191,0.01588800052801768
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,8191,0.01703466723362605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,16383,0.017029333859682083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,16383,0.01905599981546402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,float16,32767,0.021333334346612293
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,32767,0.01926400015751521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,1,0.009285333255926767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,1,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,3,0.009130666653315226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,3,0.010026666646202406
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,7,0.010133333504199982
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,15,0.008933333059151968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,15,0.009786666681369146
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,31,0.00926399976015091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,31,0.010106666634480158
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,63,0.009248000259200731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,63,0.013434667140245438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,2,128,1,float16,fp8,511,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,127,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,255,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,127,0.013562666873137156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,511,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,1023,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,511,0.012970666090647379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,1023,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,2047,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,2047,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,4095,0.013295999417702356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,4095,0.012970666090647379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,8191,0.015061333775520325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,8191,0.014981333166360855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,7,0.008997333546479544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,16383,0.017029333859682083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,32767,0.03362133353948593
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,32767,0.01836266616980235
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,1,0.008901333436369896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,1,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,fp8,255,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,3,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,3,0.010026666646202406
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,7,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,7,0.010277333358923594
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,15,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,15,0.010522666076819101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,31,0.008933333059151968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,31,0.010245333115259806
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,63,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,4,128,1,float16,float16,16383,0.01701333373785019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,63,0.009466666728258133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,127,0.009002666920423508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,127,0.010373333469033241
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,255,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,255,0.009743999689817429
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,511,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,511,0.011306667079528173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,1023,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,1023,0.011077333241701126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,2047,0.012837332983811697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,2047,0.013077333569526672
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,4095,0.013141332815090815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,8191,0.014970666418472925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,8191,0.01534933348496755
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,16383,0.032111999889214836
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,16383,0.017162666966517765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,32767,0.050623998045921326
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,1,0.010805333654085795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,1,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,3,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,3,0.010677333921194077
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,7,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,float16,4095,0.01322666679819425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,15,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,7,0.012362666428089142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,15,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,31,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,24,8,128,1,float16,fp8,32767,0.03345066557327906
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,31,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,63,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,63,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,127,0.010175999874869982
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,127,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,255,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,255,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,511,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,511,0.012896000097195307
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,1023,0.013050666699806849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,1023,0.012981332838535309
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,2047,0.03028800090154012
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,float16,4095,0.04882133503754934
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,4095,0.03142933299144109
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,2,128,1,float16,fp8,2047,0.017498667041460674
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,1,0.011557333171367645
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,1,0.011322667201360067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,3,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,7,0.011407999942700068
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,3,0.01119999960064888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,7,0.011098666737476984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,15,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,15,0.011168000598748526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,31,0.01192533348997434
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,31,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,63,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,63,0.011221333096424738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,127,0.011637333780527115
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,127,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,255,0.011125333607196808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,255,0.011178666104873022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,511,0.01292266696691513
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,511,0.013381333400805792
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,1023,0.029690665503342945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,1023,0.01591466615597407
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,2047,0.048997332652409874
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,2047,0.03158933420976003
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,float16,4095,0.08442667126655579
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,4,128,1,float16,fp8,4095,0.05006400247414907
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,1,0.012954667210578918
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,1,0.01321600005030632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,3,0.012970666090647379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,3,0.013141332815090815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,7,0.012949333836634954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,7,0.013178666432698568
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,15,0.013013333082199097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,31,0.012944000462690989
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,31,0.013237333546082178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,63,0.013237333546082178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,63,0.012965332716703415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,127,0.014533333480358124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,127,0.012805332740147909
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,255,0.013141332815090815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,255,0.01303999995191892
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,511,0.03014400104681651
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,1023,0.04884799818197886
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,15,0.012938667088747025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,1023,0.03137599925200144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,2047,0.08604799707730611
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,2047,0.049546668926874794
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,float16,4095,0.15842666228612265
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,4095,0.08601599931716919
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,1,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,1,0.010453333457310995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,3,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,3,0.010725333044926325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,24,8,128,1,float16,fp8,511,0.01701333373785019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,7,0.009322666873534521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,7,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,15,0.009093333035707474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,15,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,31,0.00949866697192192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,31,0.00933333362142245
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,63,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,63,0.009408000235756239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,127,0.010165333126982054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,127,0.01116266722480456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,255,0.009205333267649015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,255,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,511,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,511,0.011296000331640244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,1023,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,2047,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,1023,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,2047,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,4095,0.014831999937693277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,4095,0.01504533365368843
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,8191,0.015466666469971338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,16383,0.016997333616018295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,16383,0.01748266691962878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,fp8,32767,0.019199999670187633
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,1,0.008965333302815756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,1,0.008832000195980072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,3,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,3,0.00890666681031386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,7,0.009226666763424873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,7,0.008986666798591614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,15,0.008938666433095932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,15,0.00902399979531765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,31,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,31,0.00898133342464765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,63,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,63,0.00901333304742972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,127,0.009514666472872099
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,127,0.00926399976015091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,255,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,255,0.008943999807039896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,511,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,511,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,1023,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,1023,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,2047,0.012954667210578918
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,32767,0.033733333150545754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,2047,0.012960000584522883
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,4095,0.013210666676362356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,4095,0.013002666334311167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,8191,0.015295999745527903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,16383,0.03298133363326391
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,16383,0.017290666699409485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,32767,0.03362133353948593
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,float16,32767,0.05099200208981832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,1,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,1,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,3,0.00901333304742972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,3,0.009183999771873156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,7,0.008901333436369896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,15,0.00902399979531765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,15,0.009493333597977957
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,2,128,1,float16,float16,8191,0.017162666966517765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,31,0.008842666943868002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,31,0.010458666831254959
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,63,0.009205333267649015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,63,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,127,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,127,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,255,0.008842666943868002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,511,0.010751999914646149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,511,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,7,0.009029333169261614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,1023,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,1023,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,2047,0.012629333883523941
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,2047,0.01179733375708262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,4095,0.013157332936922709
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,4095,0.013072000195582708
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,8191,0.016858667135238647
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,4,128,1,float16,fp8,8191,0.014933332800865173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,16383,0.04764799773693085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,16383,0.03126399964094162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,32767,0.08430400490760803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,32767,0.04840533435344696
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,fp8,255,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,1,0.012847999731699625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,1,0.011989332735538483
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,3,0.012800000607967377
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,7,0.012842666357755661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,3,0.012671999633312225
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,24,8,128,1,float16,float16,8191,0.029887999097506206
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,7,0.012768000364303589
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,15,0.012986666212479273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,15,0.012144000579913458
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,31,0.012159999459981918
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,31,0.01293333371480306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,63,0.013023999830087027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,63,0.01303999995191892
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,127,0.012800000607967377
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,255,0.012960000584522883
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,255,0.01357866699496905
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,511,0.015178666760524115
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,511,0.014970666418472925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,1023,0.017162666966517765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,1023,0.031680000325044
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,127,0.012853333105643591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,float16,2047,0.04985600213209788
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,2,128,1,float16,fp8,2047,0.031727999448776245
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,1,0.013082666943470636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,1,0.015226667126019796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,3,0.014655999839305878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,7,0.013013333082199097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,7,0.013440000514189402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,3,0.013295999417702356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,15,0.013786666095256805
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,15,0.013221333424250284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,31,0.013306666165590286
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,63,0.014458666245142618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,63,0.012949333836634954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,127,0.014890667051076889
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,127,0.01303999995191892
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,255,0.01423466702302297
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,255,0.013264000415802002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,511,0.02993600070476532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,511,0.017114666601022083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,31,0.014069333672523499
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,1023,0.05021866659323374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,2047,0.049925332268079124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,float16,2047,0.08648000160853068
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,1,0.01682666689157486
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,3,0.01732800031701724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,1,0.019066666563351948
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,3,0.016997333616018295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,7,0.01699200024207433
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,7,0.018309333672126133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,4,128,1,float16,fp8,1023,0.03126933425664902
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,15,0.016997333616018295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,15,0.0186666672428449
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,31,0.019173332800467808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,31,0.01701333373785019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,63,0.018901333212852478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,63,0.017263999829689663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,127,0.019050666441520054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,127,0.016965333372354507
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,255,0.029893333713213604
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,255,0.017290666699409485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,511,0.04877866804599762
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,511,0.0314026673634847
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,1023,0.04985066751639048
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,fp8,2047,0.08461333314577739
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,2047,0.15584533413251242
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,1,0.015247999380032221
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,24,8,128,1,float16,float16,1023,0.08480000495910645
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,1,0.014869333555301031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,3,0.015157333264748255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,3,0.01551466683546702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,7,0.015018666783968607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,7,0.014853333433469137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,15,0.01526933287580808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,15,0.015226667126019796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,31,0.015024000157912573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,31,0.01492799942692121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,63,0.01524266724785169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,63,0.016496000190575916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,127,0.014943999548753103
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,127,0.01504533365368843
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,255,0.015168000012636185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,float16,511,0.03155199935038885
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,511,0.019194666296243668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,1,0.018992000569899876
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,1,0.016986666868130367
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,3,0.018805333723624546
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,3,0.01706133286158244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,2,128,1,float16,fp8,255,0.014938666174809137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,7,0.017973333597183228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,7,0.016970666746298473
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,15,0.018895999838908512
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,15,0.017450666675964992
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,31,0.019130667050679524
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,31,0.016927999754746754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,63,0.019167999426523846
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,63,0.018186666071414948
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,127,0.018746666610240936
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,127,0.016986666868130367
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,255,0.0315786674618721
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,255,0.017322666943073273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,float16,511,0.04862933357556661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,4,128,1,float16,fp8,511,0.031712000568707786
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,1,0.027514666318893433
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,3,0.027285332481066387
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,3,0.02516266703605652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,7,0.02756800005833308
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,1,0.025605333348115284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,7,0.02492266645034154
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,15,0.027637332677841187
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,31,0.027295999228954315
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,31,0.025173333783944447
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,15,0.02516799916823705
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,63,0.025263999899228413
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,63,0.027866666515668232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,127,0.03356799980004629
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,127,0.025424001117547352
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,255,0.04876266419887543
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,255,0.03209600100914637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,float16,511,0.08476266264915466
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,24,8,128,1,float16,fp8,511,0.050842667619387306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,float16,1,0.021295999487241108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,fp8,1,0.01937066639463107
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,float16,3,0.021418665846188862
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,float16,7,0.02128000060717265
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,float16,15,0.02143466720978419
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,fp8,7,0.019274666905403137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,fp8,3,0.019402666638294857
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,fp8,15,0.019343999524911244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,float16,31,0.021418665846188862
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,fp8,31,0.01922133316596349
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,fp8,63,0.019109333554903667
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,float16,127,0.021370666722456615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,fp8,127,0.01894933357834816
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,float16,63,0.021568000316619873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,float16,255,0.031119999786218006
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,float16,1,0.02739199995994568
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,2,128,1,float16,fp8,255,0.019424000134070713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,float16,3,0.027664000789324444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,fp8,3,0.025583999852339428
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,fp8,1,0.025616000096003216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,float16,7,0.027376001079877216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,fp8,7,0.02532800038655599
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,float16,15,0.02757866680622101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,float16,31,0.027834666272004444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,fp8,31,0.025594666600227356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,fp8,15,0.025765334566434223
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,float16,63,0.027269333600997925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,fp8,63,0.027098665634791057
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,float16,127,0.03542399903138479
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,float16,255,0.05117333432038625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,fp8,255,0.03579200059175491
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,float16,1,0.04794133206208547
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,4,128,1,float16,fp8,127,0.026778665681680042
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,fp8,1,0.03977066775163015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,fp8,3,0.03990400085846583
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,float16,3,0.04914666712284088
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,fp8,7,0.039477333426475525
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,float16,15,0.04875733455022176
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,fp8,15,0.04010133445262909
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,float16,7,0.04961599906285604
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,float16,31,0.048672000567118325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,fp8,31,0.03988266736268997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,float16,63,0.049839998284975685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,float16,127,0.05431999762852987
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,fp8,63,0.041706666350364685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,fp8,127,0.04470933477083842
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,1,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,3,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,1,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,3,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,7,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,15,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,7,0.011194666226704916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,15,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,31,0.00892800030608972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,31,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,63,0.01027199998497963
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,63,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,127,0.010656000425418219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,127,0.009290666629870733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,255,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,255,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,511,0.011354666203260422
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,511,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,1023,0.011488000551859537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,1023,0.010805333654085795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,2047,0.013295999417702356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,4095,0.015482666591803232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,4095,0.015135999768972397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,float16,255,0.08896000186602275
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,8191,0.017514667163292568
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,16383,0.03324266771475474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,8191,0.016864000509182613
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,16383,0.019274666905403137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,float16,32767,0.051962668697039284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,32767,0.035461333890755974
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,1,0.009322666873534521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,1,0.009829333052039146
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,24,8,128,1,float16,fp8,255,0.05560000240802765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,3,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,3,0.011450666934251785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,7,0.009183999771873156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,2,128,1,float16,fp8,2047,0.013167999684810638
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,15,0.010538666198650995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,7,0.009434666484594345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,31,0.008879999940594038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,31,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,63,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,63,0.00949866697192192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,127,0.008821333448092142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,127,0.008901333436369896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,255,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,255,0.008954666554927826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,511,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,511,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,1023,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,1023,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,2047,0.012991999586423239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,2047,0.01257066677014033
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,4095,0.013178666432698568
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,8191,0.0296426663796107
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,15,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,8191,0.015594666202863058
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,16383,0.031199999153614044
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,32767,0.08384000261624654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,1,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,fp8,32767,0.05293866495291392
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,3,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,4095,0.01498666654030482
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,3,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,7,0.00903466654320558
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,7,0.010586666564146677
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,15,0.008874666566650072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,15,0.010650667051474253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,31,0.009679999823371569
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,4,128,1,float16,float16,16383,0.047930667797724404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,31,0.010703999549150467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,63,0.010037333394090334
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,63,0.00973866693675518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,127,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,127,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,255,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,255,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,511,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,1,0.00978133330742518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,511,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,1023,0.012794667234023413
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,1023,0.012949333836634954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,2047,0.012826666235923767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,2047,0.013077333569526672
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,4095,0.030000001192092896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,4095,0.01575999955336253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,8191,0.03160533308982849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,8191,0.04814933240413666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,16383,0.048885335524876915
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,16383,0.08435199658075969
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,fp8,32767,0.08448533217112224
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,24,8,128,1,float16,float16,32767,0.15622933705647787
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,float16,1,0.035189333061377205
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,float16,3,0.03340800106525421
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,fp8,3,0.029525332152843475
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,float16,7,0.03384000062942505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,fp8,7,0.02956266701221466
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,float16,15,0.03563733398914337
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,fp8,1,0.029994666576385498
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,fp8,15,0.02975466599067052
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,float16,31,0.034048000971476235
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,fp8,31,0.029418667157491047
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,float16,63,0.036042665441830955
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,fp8,63,0.02932800104220708
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,float16,127,0.0393653338154157
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,2,128,1,float16,fp8,127,0.029792000850041706
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,float16,1,0.050704002380371094
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,fp8,1,0.041840001940727234
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,float16,3,0.0498879998922348
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,fp8,3,0.04247466723124186
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,fp8,7,0.04190933207670847
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,float16,7,0.05097066859404246
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,float16,15,0.05062933266162872
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,fp8,15,0.04387733340263367
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,float16,31,0.050399998823801674
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,float16,63,0.05150933563709259
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,fp8,63,0.043247997760772705
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,fp8,31,0.043920000394185386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,float16,127,0.05806933343410492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,float16,1,0.08541333675384521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,4,128,1,float16,fp8,127,0.04773866633574168
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,fp8,1,0.07473599910736084
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,float16,3,0.08462400237719218
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,float16,7,0.0846560001373291
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,fp8,3,0.07459199925263722
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,fp8,7,0.07454933226108551
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,float16,15,0.08489066362380981
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,fp8,15,0.0743146687746048
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,float16,31,0.08596799770991008
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,fp8,31,0.07454399764537811
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,float16,63,0.08528533577919006
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,fp8,63,0.07461866736412048
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,float16,127,0.09868266185124715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,24,8,128,1,float16,fp8,127,0.07839466631412506
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,2,128,1,float16,float16,1,0.062090665102005005
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,2,128,1,float16,fp8,1,0.04976533353328705
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,2,128,1,float16,float16,3,0.062314664324124656
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,2,128,1,float16,float16,7,0.062047998110453285
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,2,128,1,float16,fp8,7,0.04971200227737427
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,2,128,1,float16,float16,15,0.06268799801667531
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,2,128,1,float16,fp8,3,0.05012266834576925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,2,128,1,float16,fp8,15,0.049685334165891014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,2,128,1,float16,fp8,31,0.05008533100287119
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,2,128,1,float16,float16,31,0.062128002444903054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,2,128,1,float16,fp8,63,0.04981866478919983
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,2,128,1,float16,float16,63,0.06273599962393443
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,4,128,1,float16,float16,1,0.08919999996821086
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,4,128,1,float16,fp8,1,0.08026133477687836
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,4,128,1,float16,float16,3,0.09145599603652954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,4,128,1,float16,fp8,3,0.08122133215268452
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,4,128,1,float16,float16,7,0.08913600444793701
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,4,128,1,float16,fp8,7,0.08066133161385854
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,4,128,1,float16,fp8,15,0.08134933312733968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,4,128,1,float16,float16,15,0.08947199583053589
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,4,128,1,float16,fp8,31,0.08070399860541026
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,4,128,1,float16,float16,31,0.09057600299517314
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,8,128,1,float16,float16,1,0.1588479975859324
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,4,128,1,float16,fp8,63,0.08091733356316884
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,4,128,1,float16,float16,63,0.09008000294367473
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,8,128,1,float16,fp8,1,0.1379039982954661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,8,128,1,float16,float16,3,0.15914133191108704
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,8,128,1,float16,fp8,3,0.1378773351510366
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,8,128,1,float16,float16,7,0.1601599951585134
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,8,128,1,float16,fp8,7,0.13852799932161966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,8,128,1,float16,float16,15,0.1591200033823649
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,8,128,1,float16,fp8,15,0.1381173332532247
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,8,128,1,float16,float16,31,0.16053332885106406
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,1,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,1,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,3,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,3,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,7,0.009535999968647957
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,7,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,15,0.009690666571259499
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,15,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,31,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,31,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,63,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,63,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,8,128,1,float16,fp8,31,0.1381600002447764
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,127,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,127,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,255,0.009402666861812273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,255,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,511,0.011205332974592844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,511,0.011365332951148352
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,1023,0.012335999558369318
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,1023,0.011370666325092316
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,2047,0.013258667041858038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,2047,0.013178666432698568
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,4095,0.015018666783968607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,4095,0.01498666654030482
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,8191,0.03176533430814743
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,8191,0.017498667041460674
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,float16,16383,0.0498879998922348
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,2,128,1,float16,fp8,16383,0.03209600100914637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,1,0.010645333677530289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,1,0.010581333190202713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,3,0.010368000095089277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,3,0.010351999973257383
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,7,0.009312000125646591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,7,0.011274666835864386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,15,0.009466666728258133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,15,0.010805333654085795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,31,0.009626666704813639
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,31,0.010805333654085795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,63,0.009679999823371569
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,63,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,8,128,1,float16,fp8,63,0.13780799508094788
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,127,0.010586666564146677
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,24,8,128,1,float16,float16,63,0.1590666671593984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,127,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,255,0.010591999938090643
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,511,0.011146667102972666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,1023,0.012954667210578918
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,1023,0.012831999609867731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,255,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,2047,0.014874666929244995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,511,0.011264000087976456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,2047,0.013552000125249227
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,4095,0.01565333331624667
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,8191,0.04808000226815542
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,8191,0.032261334359645844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,16383,0.08455466230710347
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,fp8,16383,0.04944533109664917
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,1,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,1,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,3,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,3,0.011168000598748526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,7,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,7,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,15,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,15,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,31,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,31,0.01116266722480456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,63,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,63,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,127,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,127,0.01119999960064888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,255,0.010709332923094431
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,4,128,1,float16,float16,4095,0.030000001192092896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,255,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,511,0.011146667102972666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,511,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,1023,0.013183999806642532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,2047,0.01516266663869222
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,2047,0.029333333174387615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,4095,0.029909332593282063
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,4095,0.04808533191680908
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,8191,0.04834666848182678
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,8191,0.08264000217119853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,fp8,16383,0.08401067058245341
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,16383,0.15529599785804749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,1,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,1,0.01126933346192042
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,3,0.009872000043590864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,3,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,7,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,15,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,7,0.009973333527644476
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,15,0.010640000303586325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,31,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,63,0.009477333476146063
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,31,0.010010666524370512
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,63,0.010751999914646149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,127,0.009461333354314169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,255,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,127,0.010117333382368088
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,255,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,511,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,511,0.011535999675591787
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,1023,0.011941333611806234
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,1023,0.011317333827416102
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,2047,0.013050666699806849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,4095,0.015130666395028433
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,24,8,128,1,float16,float16,1023,0.013365333278973898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,4095,0.014981333166360855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,8191,0.03129599988460541
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,8191,0.017360000560681026
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,16383,0.05004266897837321
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,fp8,16383,0.03183999905983607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,1,0.008992000172535578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,1,0.009322666873534521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,3,0.008965333302815756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,3,0.010650667051474253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,7,0.009029333169261614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,15,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,15,0.008986666798591614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,31,0.008896000062425932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,31,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,1,128,1,float16,float16,2047,0.01303999995191892
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,63,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,127,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,127,0.010559999694426855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,255,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,255,0.010410666465759277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,511,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,511,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,7,0.009914666414260864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,1023,0.012890666723251343
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,1023,0.01293333371480306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,2047,0.01312000056107839
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,2047,0.01479999969402949
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,4095,0.0308746670683225
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,4095,0.01525866612792015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,8191,0.0492799977461497
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,8191,0.03253866732120514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,63,0.00895999992887179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,fp8,16383,0.05023466547330221
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,1,0.009589333087205887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,1,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,3,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,3,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,7,0.010661333799362183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,7,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,15,0.010687999427318573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,15,0.010410666465759277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,31,0.010421333213647207
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,31,0.010703999549150467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,63,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,63,0.0106133334338665
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,127,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,127,0.010538666198650995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,255,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,255,0.010490667074918747
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,511,0.011152000476916632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,511,0.011312000453472137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,2,128,1,float16,float16,16383,0.08538132905960083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,1023,0.012949333836634954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,1023,0.013013333082199097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,2047,0.02985599885384242
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,2047,0.014815999815861383
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,4095,0.04765866696834564
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,4095,0.030991998811562855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,8191,0.04814933240413666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,8191,0.08373333017031352
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,float16,16383,0.15526400009791055
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,4,128,1,float16,fp8,16383,0.08569600184758504
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,1,0.011226666470368704
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,1,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,3,0.011242666592200598
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,3,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,7,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,7,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,15,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,15,0.011242666592200598
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,31,0.011183999478816986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,31,0.011168000598748526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,63,0.011136000355084738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,63,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,127,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,127,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,255,0.011215999722480774
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,255,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,511,0.013552000125249227
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,511,0.01292266696691513
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,1023,0.029546665648619335
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,1023,0.01532799998919169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,2047,0.03065066784620285
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,2047,0.048197334011395775
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,4095,0.08437333504358928
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,4095,0.04894933104515076
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,8191,0.1564853290716807
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,8191,0.08539733290672302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,1,0.009242666885256767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,float16,16383,0.3011946678161621
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,1,0.009509333098928133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,3,0.009253333633144697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,3,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,7,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,15,0.009594666461149851
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,31,0.00927466650803884
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,15,0.011231999844312668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,31,0.010362666721145311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,63,0.010005333150426546
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,63,0.011264000087976456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,127,0.00927466650803884
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,127,0.011530666301647821
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,255,0.009338666374484697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,255,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,511,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,1023,0.010709332923094431
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,511,0.011509332805871964
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,1023,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,2047,0.011653333902359009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,2047,0.011125333607196808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,4095,0.011823999385039011
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,4095,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,8191,0.015157333264748255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,8191,0.015856000284353893
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,16383,0.019066666563351948
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,16,8,128,1,float16,fp8,16383,0.1585546632607778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,32767,0.025392000873883564
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,7,0.008933333059151968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,32767,0.025370667378107708
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,float16,65535,0.029658667743206024
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,65535,0.03143466760714849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,1,0.00884799969693025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,1,0.009237333511312803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,3,0.008821333448092142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,7,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,7,0.008976000050703684
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,15,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,15,0.009008000294367472
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,31,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,31,0.008890666688481966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,63,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,63,0.008821333448092142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,127,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,127,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,255,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,255,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,511,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,511,0.010751999914646149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,1023,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,1023,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,2047,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,2047,0.010629333555698395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,4095,0.011136000355084738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,1,128,1,float16,fp8,16383,0.019424000134070713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,4095,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,8191,0.01488000030318896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,8191,0.014901333798964819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,16383,0.016997333616018295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,16383,0.019194666296243668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,32767,0.019333332777023315
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,32767,0.021397332350413006
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,3,0.008879999940594038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,fp8,65535,0.022650666534900665
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,1,0.010213333492477735
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,1,0.009589333087205887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,3,0.00877333308259646
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,3,0.009509333098928133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,7,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,7,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,15,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,15,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,31,0.00884799969693025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,63,0.010181333248813948
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,63,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,127,0.010480000327030817
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,127,0.010687999427318573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,255,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,255,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,511,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,511,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,1023,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,1023,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,31,0.009850666547815004
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,2047,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,4095,0.011674666156371435
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,4095,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,8191,0.015466666469971338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,8191,0.015477333217859268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,16383,0.017674667139848072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,32767,0.019567999988794327
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,32767,0.019274666905403137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,65535,0.036714665591716766
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,2,128,1,float16,float16,65535,0.023408000667889912
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,65535,0.02125866711139679
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,1,0.009253333633144697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,1,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,3,0.009312000125646591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,3,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,7,0.009301333377758661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,fp8,2047,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,15,0.009338666374484697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,15,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,31,0.009679999823371569
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,31,0.010186666622757912
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,4,128,1,float16,float16,16383,0.01754666616519292
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,63,0.009370666618148485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,127,0.009290666629870733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,127,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,255,0.009312000125646591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,255,0.009653333574533463
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,511,0.01051733394463857
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,511,0.01139733319481214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,1023,0.010751999914646149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,1023,0.011434666812419891
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,2047,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,2047,0.011674666156371435
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,4095,0.013189333180586496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,4095,0.01332266628742218
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,7,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,8191,0.01504533365368843
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,16383,0.01724799970785777
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,16383,0.01692266638080279
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,32767,0.018245333184798557
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,32767,0.03372266640265783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,63,0.008949333180983862
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,1,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,65535,0.03425599883000056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,float16,65535,0.05211733281612396
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,1,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,3,0.00919999989370505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,3,0.009237333511312803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,7,0.008874666566650072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,15,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,15,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,31,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,31,0.008869333192706108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,63,0.009002666920423508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,63,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,127,0.00933333362142245
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,127,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,255,0.008912000184257826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,255,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,511,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,1023,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,1023,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,16,8,128,1,float16,fp8,8191,0.015237333873907724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,2047,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,2047,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,4095,0.011333333949247995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,4095,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,8191,0.015226667126019796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,8191,0.014917333920796713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,16383,0.019167999426523846
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,16383,0.01899733394384384
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,32767,0.021407999098300934
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,32767,0.02102400114138921
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,float16,65535,0.023706667125225067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,7,0.011136000355084738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,1,0.009285333255926767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,1,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,3,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,3,0.01062400018175443
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,7,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,7,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,15,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,15,0.009642666826645533
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,31,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,511,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,31,0.009306666751702627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,63,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,63,0.009509333098928133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,127,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,127,0.00933333362142245
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,255,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,511,0.010575999816258749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,511,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,1023,0.009946666657924652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,1023,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,1,128,1,float16,fp8,65535,0.02316266546646754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,2047,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,2047,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,4095,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,8191,0.014885333677132925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,8191,0.015130666395028433
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,16383,0.01718933383623759
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,255,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,16383,0.01714666684468587
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,32767,0.019109333554903667
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,65535,0.03575466573238373
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,fp8,65535,0.021344001094500225
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,1,0.00897066667675972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,1,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,3,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,4095,0.011296000331640244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,3,0.008949333180983862
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,7,0.009285333255926767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,7,0.00926399976015091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,15,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,15,0.009183999771873156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,31,0.009375999992092451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,63,0.009423999736706415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,63,0.010640000303586325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,127,0.009418666362762451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,255,0.009408000235756239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,255,0.010453333457310995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,511,0.011306667079528173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,511,0.011231999844312668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,1023,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,1023,0.011141333729028702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,2047,0.011333333949247995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,2047,0.011178666104873022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,31,0.008869333192706108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,4095,0.013183999806642532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,4095,0.013269333789745966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,8191,0.015077333897352219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,8191,0.015040000279744467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,127,0.009893333539366722
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,16383,0.017605333278576534
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,16383,0.016688000410795212
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,32767,0.033770665526390076
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,float16,65535,0.05150933563709259
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,65535,0.035205334424972534
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,1,0.00890666681031386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,1,0.010048000141978264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,3,0.008997333546479544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,3,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,7,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,7,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,15,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,15,0.010751999914646149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,31,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,31,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,63,0.008826666822036108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,63,0.010565333068370819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,127,0.009002666920423508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,2,128,1,float16,float16,32767,0.019098666807015736
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,127,0.010762666662534079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,255,0.008837333569924036
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,255,0.00919999989370505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,4,128,1,float16,fp8,32767,0.017504000415404636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,511,0.011205332974592844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,511,0.010762666662534079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,1023,0.010629333555698395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,1023,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,2047,0.013007999708255133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,2047,0.012682666381200155
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,4095,0.013418667018413544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,8191,0.016330666840076447
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,8191,0.016943999876578648
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,16383,0.03176533430814743
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,16383,0.01740266631046931
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,32767,0.03398933261632919
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,65535,0.08591999610265096
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,65535,0.0517493337392807
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,1,0.009397333487868309
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,1,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,3,0.00902399979531765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,3,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,7,0.00927466650803884
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,7,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,fp8,4095,0.013461332768201828
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,15,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,15,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,31,0.00979200005531311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,16,8,128,1,float16,float16,32767,0.04969066878159841
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,31,0.00983466642598311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,63,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,63,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,127,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,127,0.009866666669646898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,255,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,255,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,511,0.012586666891972223
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,511,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,1023,0.013167999684810638
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,1023,0.012778667112191519
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,2047,0.01505600040157636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,2047,0.014773332824309668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,4095,0.031445334355036415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,4095,0.017573333034912746
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,float16,8191,0.049829334020614624
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,1,0.010319999729593595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,1,128,1,float16,fp8,8191,0.03238933285077413
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,1,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,3,0.00983466642598311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,3,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,15,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,7,0.01128000020980835
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,15,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,31,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,31,0.010762666662534079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,63,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,63,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,127,0.010805333654085795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,127,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,255,0.010687999427318573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,511,0.011317333827416102
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,7,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,511,0.011120000233252844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,1023,0.013434667140245438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,1023,0.01303999995191892
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,2047,0.03036266565322876
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,2047,0.015040000279744467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,4095,0.048885335524876915
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,4095,0.03143466760714849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,fp8,8191,0.04897066454092661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,8191,0.08620799581209819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,1,0.011322667201360067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,2,128,1,float16,float16,255,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,1,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,3,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,3,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,7,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,7,0.01129066695769628
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,15,0.01181866725285848
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,15,0.01139733319481214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,31,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,31,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,63,0.01166933278242747
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,63,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,127,0.011242666592200598
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,127,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,255,0.01137599969903628
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,255,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,511,0.013088000317414602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,511,0.013088000317414602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,1023,0.030261332790056866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,2047,0.04800533254941305
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,4095,0.04982399940490723
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,4095,0.08495466907819112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,float16,8191,0.15680000185966492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,8191,0.08634666601816814
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,1,0.01328533391157786
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,1,0.013199999928474426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,2047,0.03143466760714849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,4,128,1,float16,fp8,1023,0.015578666081031164
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,3,0.01303999995191892
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,3,0.01332266628742218
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,7,0.012986666212479273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,15,0.013141332815090815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,7,0.01320533330241839
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,15,0.013290667285521826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,31,0.012826666235923767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,31,0.013194666554530462
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,63,0.013237333546082178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,127,0.012869333227475485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,127,0.012858666479587555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,255,0.013440000514189402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,255,0.013125333935022354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,511,0.029781334102153778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,511,0.01693333312869072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,1023,0.04905599852403005
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,1023,0.031178665657838184
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,2047,0.05004799862702688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,63,0.014773332824309668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,2047,0.08487466971079509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,4095,0.08589866757392883
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,1,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,8191,0.30560533205668133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,1,0.00879466657837232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,fp8,8191,0.1586133340994517
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,3,0.009061333412925402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,3,0.00919999989370505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,7,0.008832000195980072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,7,0.009338666374484697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,15,0.008805333326260248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,31,0.009418666362762451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,31,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,63,0.010474666953086853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,63,0.00878399983048439
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,127,0.010735999792814255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,127,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,255,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,255,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,511,0.011391999820868174
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,16,8,128,1,float16,float16,4095,0.15853333473205566
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,511,0.011695999652147293
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,1023,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,1023,0.011498666057984034
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,2047,0.011306667079528173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,4095,0.011519999553759893
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,15,0.009194666519761086
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,4095,0.011418666690587997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,8191,0.015423999478419622
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,8191,0.01720533271630605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,16383,0.018005333840847015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,16383,0.018640000373125076
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,32767,0.021162666380405426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,32767,0.020794666061798733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,65535,0.03612799942493439
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,fp8,65535,0.02348800003528595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,1,0.009632000078757605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,1,0.009354666496316591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,3,0.009818666925032934
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,7,0.009786666681369146
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,3,0.00983466642598311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,7,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,15,0.009328000247478485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,15,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,31,0.009546666716535887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,31,0.00979200005531311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,1,128,1,float16,float16,2047,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,63,0.009786666681369146
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,63,0.00943999985853831
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,127,0.009568000212311745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,127,0.009226666763424873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,255,0.009759999811649323
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,255,0.009914666414260864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,511,0.011274666835864386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,511,0.011173332730929056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,1023,0.01118933285276095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,2047,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,4095,0.012954667210578918
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,4095,0.013237333546082178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,8191,0.015013333410024643
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,8191,0.015034666905800501
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,16383,0.01695999999841054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,32767,0.03150933235883713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,32767,0.01752000053723653
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,65535,0.050479998191197716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,65535,0.033386667569478355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,1,0.008997333546479544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,1,0.009008000294367472
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,3,0.00879466657837232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,3,0.00890666681031386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,1023,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,7,0.008976000050703684
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,7,0.009952000031868616
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,15,0.008912000184257826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,15,0.010101333260536194
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,fp8,16383,0.015008000036080679
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,31,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,31,0.009568000212311745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,63,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,63,0.008832000195980072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,127,0.009413333609700203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,255,0.008837333569924036
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,255,0.010725333044926325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,511,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,511,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,1023,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,1023,0.010666667173306147
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,2047,0.01292266696691513
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,2047,0.013002666334311167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,4095,0.014666666587193808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,4095,0.014474666366974512
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,8191,0.015386667102575302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,2,128,1,float16,float16,2047,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,8191,0.01525866612792015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,16383,0.031727999448776245
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,16383,0.01739199956258138
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,32767,0.05044800043106079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,32767,0.03335466732581457
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,float16,65535,0.08633066217104594
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,65535,0.05205333232879639
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,1,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,1,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,3,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,3,0.009370666618148485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,4,128,1,float16,fp8,127,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,7,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,7,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,15,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,31,0.009061333412925402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,63,0.008799999952316284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,63,0.009290666629870733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,127,0.008821333448092142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,127,0.009226666763424873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,255,0.009130666653315226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,255,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,511,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,511,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,1023,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,1023,0.011215999722480774
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,2047,0.013034666577974955
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,2047,0.012997332960367203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,15,0.009349333122372627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,31,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,4095,0.015018666783968607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,4095,0.01357866699496905
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,8191,0.015930666277805965
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,16383,0.04771199822425842
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,16383,0.03134933362404505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,32767,0.04846400022506714
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,32767,0.08396800359090169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,fp8,65535,0.08547199765841167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,1,0.010591999938090643
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,1,0.01116266722480456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,8191,0.02975466599067052
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,3,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,3,0.011525332927703857
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,7,0.010693332801262537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,16,8,128,1,float16,float16,65535,0.15576533476511636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,7,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,15,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,31,0.011045332998037338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,15,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,31,0.011109333485364914
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,63,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,63,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,127,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,127,0.011173332730929056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,255,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,255,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,511,0.013056000073750814
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,1023,0.01314666618903478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,1023,0.013007999708255133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,2047,0.030218665798505146
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,2047,0.0169813334941864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,float16,4095,0.04941866795221964
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,1,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,4095,0.03156800071398417
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,1,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,3,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,7,0.011264000087976456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,3,0.011264000087976456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,7,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,15,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,15,0.011247999966144562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,31,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,31,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,1,128,1,float16,fp8,511,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,63,0.011445333560307821
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,63,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,127,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,127,0.011194666226704916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,255,0.012175999581813812
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,255,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,511,0.01302933320403099
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,511,0.013359999905029932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,1023,0.031221332649389904
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,1023,0.01533866673707962
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,2047,0.04858666658401489
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,2047,0.03164266546567281
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,float16,4095,0.08662399649620056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,2,128,1,float16,fp8,4095,0.049733335773150124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,1,0.013066666821638743
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,3,0.015087999403476715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,3,0.012901333471139273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,7,0.012928000340859095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,7,0.013440000514189402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,15,0.013232000172138214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,15,0.013056000073750814
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,31,0.015098666151364645
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,31,0.012944000462690989
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,63,0.014085333794355392
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,1,0.013045333325862885
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,63,0.013354666531085968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,127,0.013994666437307993
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,127,0.012917333592971167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,255,0.013199999928474426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,511,0.030954666435718536
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,511,0.017157333592573803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,255,0.014906667172908783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,1023,0.03202133377393087
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,1023,0.04976533353328705
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,2047,0.0865226686000824
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,2047,0.05049600203831991
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,1,0.018522666146357853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,float16,4095,0.1593119998772939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,4,128,1,float16,fp8,4095,0.08762666583061218
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,1,0.01706133286158244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,3,0.01833600054184596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,3,0.016805333395799
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,7,0.0184906671444575
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,15,0.01921066641807556
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,15,0.01720533271630605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,7,0.017450666675964992
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,31,0.018874666343132656
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,31,0.017050666113694508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,63,0.017375999440749485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,63,0.017317333569129307
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,127,0.01915733392039935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,127,0.017194667210181553
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,255,0.02997333308060964
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,255,0.01728533332546552
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,511,0.047797332207361855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,1023,0.0846560001373291
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,511,0.03148266673088074
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,1023,0.04880533119042715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,2047,0.15516799688339233
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,2047,0.08507733543713887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,float16,4095,0.29656533400217694
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,1,0.01292266696691513
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,16,8,128,1,float16,fp8,4095,0.15652799606323242
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,1,0.013002666334311167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,3,0.012821332861979803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,3,0.01312000056107839
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,7,0.01292266696691513
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,7,0.013034666577974955
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,15,0.012949333836634954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,15,0.01301866645614306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,31,0.013013333082199097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,63,0.01310933381319046
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,63,0.01462399959564209
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,127,0.012698666503032049
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,127,0.012869333227475485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,255,0.012725333372751871
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,31,0.012917333592971167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,255,0.012821332861979803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,511,0.015194666882356008
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,511,0.015141333142916361
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,fp8,1023,0.01704000060757001
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,1,0.014666666587193808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,1,0.013173333058754602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,3,0.013807999591032663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,3,0.012874666601419449
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,1,128,1,float16,float16,1023,0.03092266619205475
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,7,0.01440000037352244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,7,0.013157332936922709
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,15,0.014864000181357065
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,31,0.014111999422311783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,15,0.01313599944114685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,31,0.012975999464591345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,63,0.01360000049074491
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,63,0.013290667285521826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,127,0.012853333105643591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,255,0.0129120002190272
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,255,0.013605333864688873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,127,0.015279999623696009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,511,0.03143466760714849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,511,0.017024000485738117
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,float16,1023,0.04965866605440775
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,1,0.017338667064905167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,1,0.01692266638080279
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,3,0.018746666610240936
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,2,128,1,float16,fp8,1023,0.03266133368015289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,7,0.018895999838908512
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,7,0.01708799973130226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,15,0.016901332885026932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,3,0.016927999754746754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,31,0.018954666952292126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,15,0.01897066707412402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,31,0.016901332885026932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,63,0.018629333625237148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,63,0.017050666113694508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,127,0.019120000302791595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,127,0.0170666662355264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,255,0.030837332208951313
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,255,0.017114666601022083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,511,0.03173866619666418
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,1023,0.08409600456555684
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,float16,511,0.04955733319123586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,1,0.027237333357334137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,1,0.02493866781393687
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,3,0.027450665831565857
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,3,0.02420799930890401
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,4,128,1,float16,fp8,1023,0.05037866532802582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,7,0.027301333844661713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,7,0.025674665967623394
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,15,0.027232001225153606
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,15,0.025093334416548412
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,31,0.027386667827765148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,31,0.025093334416548412
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,63,0.027215999861558277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,63,0.025125332176685333
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,127,0.031632001201311745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,127,0.02497066557407379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,255,0.04807466765244802
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,255,0.033215999603271484
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,511,0.08400000135103862
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,float16,1023,0.15577066938082376
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,511,0.05022400120894114
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,16,8,128,1,float16,fp8,1023,0.08619733651479085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,fp8,1,0.015077333897352219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,float16,1,0.015077333897352219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,fp8,3,0.01516266663869222
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,float16,3,0.015557333827018738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,float16,7,0.015216000378131866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,fp8,7,0.015066667149464289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,float16,15,0.015392000476519266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,fp8,15,0.01481066644191742
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,float16,31,0.015189333508412043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,fp8,31,0.015072000523408255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,float16,63,0.01532799998919169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,fp8,63,0.015157333264748255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,float16,127,0.015087999403476715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,fp8,127,0.015087999403476715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,fp8,255,0.014949332922697067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,float16,255,0.015365333606799444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,float16,511,0.03139200061559677
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,1,128,1,float16,fp8,511,0.019002666076024372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,float16,1,0.019039999693632126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,fp8,1,0.017231999586025875
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,float16,3,0.018698666244745255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,fp8,3,0.01728533332546552
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,float16,7,0.01899733394384384
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,float16,15,0.0191040001809597
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,fp8,7,0.01740266631046931
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,fp8,15,0.01741333305835724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,float16,31,0.01926933353145917
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,fp8,31,0.017114666601022083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,float16,63,0.018992000569899876
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,fp8,63,0.017290666699409485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,float16,127,0.019130667050679524
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,fp8,127,0.016997333616018295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,float16,255,0.03159466634194056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,fp8,255,0.01721599946419398
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,float16,511,0.04996266464392344
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,2,128,1,float16,fp8,511,0.031850665807724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,float16,1,0.029205332199732464
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,fp8,1,0.025434667865435284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,float16,3,0.028005334238211315
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,fp8,3,0.025226667523384094
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,float16,7,0.027477333943049114
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,fp8,7,0.02518400053183238
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,float16,15,0.027509334186712902
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,fp8,15,0.025429333249727886
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,float16,31,0.028362666567166645
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,fp8,31,0.025568000972270966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,float16,63,0.02755733331044515
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,fp8,63,0.02553066611289978
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,float16,127,0.035029334326585136
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,fp8,127,0.025594666600227356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,float16,255,0.05072533090909322
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,fp8,255,0.03551466763019562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,float16,511,0.08732799688975017
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,4,128,1,float16,fp8,511,0.05374933282534281
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,float16,1,0.047594666481018066
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,fp8,1,0.03963200002908707
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,float16,7,0.04769066472848257
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,float16,3,0.048010667165120445
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,fp8,3,0.03992533435424169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,fp8,7,0.039503999054431915
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,float16,15,0.04805333415667216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,fp8,15,0.041402667760849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,float16,31,0.047983999053637184
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,fp8,31,0.0400693342089653
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,float16,63,0.0480373352766037
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,fp8,63,0.04049599915742874
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,fp8,127,0.044250667095184326
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,float16,127,0.05526400109132131
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,float16,255,0.08729066451390584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,1,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,fp8,255,0.05621333420276642
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,3,0.010677333921194077
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,3,0.010703999549150467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,7,0.01080000028014183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,7,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,float16,511,0.1588746706644694
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,16,8,128,1,float16,fp8,511,0.08916266759236653
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,15,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,31,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,63,0.009472000102202097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,63,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,1,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,127,0.00943999985853831
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,127,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,15,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,255,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,255,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,31,0.01055466632048289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,511,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,511,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,1023,0.011061333119869232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,1023,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,2047,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,2047,0.011328000575304031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,4095,0.014959999670584997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,4095,0.015034666905800501
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,8191,0.01598400001724561
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,8191,0.01699200024207433
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,16383,0.019120000302791595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,16383,0.017525333911180496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,32767,0.0335359995563825
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,32767,0.020949333906173706
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,float16,65535,0.0518506666024526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,1,0.009343999748428663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,3,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,3,0.00902399979531765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,7,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,7,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,15,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,15,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,31,0.008912000184257826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,31,0.010138666878143946
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,63,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,63,0.011098666737476984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,127,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,127,0.010037333394090334
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,1,128,1,float16,fp8,65535,0.03561066587766012
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,255,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,1,0.010709332923094431
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,255,0.009328000247478485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,511,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,511,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,1023,0.011231999844312668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,1023,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,2047,0.013397333522637686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,2047,0.012991999586423239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,4095,0.013093333691358566
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,4095,0.01394133393963178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,8191,0.016634666671355564
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,16383,0.03159466634194056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,8191,0.01526933287580808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,16383,0.017743999759356182
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,32767,0.050479998191197716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,32767,0.03358400116364161
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,fp8,65535,0.05106133222579956
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,2,128,1,float16,float16,65535,0.09011733531951904
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,1,0.008752000207702318
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,1,0.00949866697192192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,3,0.008805333326260248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,7,0.00927466650803884
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,3,0.010693332801262537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,15,0.009392000113924345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,15,0.01027199998497963
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,31,0.009941333283980688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,31,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,63,0.009290666629870733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,127,0.00879466657837232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,127,0.00979200005531311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,255,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,255,0.010480000327030817
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,7,0.01044800008336703
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,511,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,1023,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,63,0.010186666622757912
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,2047,0.012960000584522883
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,4095,0.014757333944241205
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,4095,0.013050666699806849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,8191,0.02980799973011017
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,8191,0.01618133361140887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,16383,0.048453330993652344
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,16383,0.03147733211517334
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,32767,0.08431466420491536
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,511,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,32767,0.04971200227737427
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,1023,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,float16,65535,0.15618667006492615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,65535,0.08702400326728821
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,1,0.010426666587591171
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,1,0.010250666489203772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,3,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,3,0.008837333569924036
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,7,0.00890666681031386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,7,0.009637333452701569
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,15,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,15,0.009392000113924345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,31,0.009306666751702627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,31,0.009354666496316591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,63,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,63,0.009530666594703993
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,127,0.009306666751702627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,127,0.008938666433095932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,255,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,255,0.009519999846816063
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,511,0.01080000028014183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,511,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,1023,0.011168000598748526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,1023,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,2047,0.012965332716703415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,4095,0.029391999046007793
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,4095,0.01526933287580808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,4,128,1,float16,fp8,2047,0.012991999586423239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,8191,0.03137599925200144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,16383,0.048842668533325195
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,16383,0.08482666810353597
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,32767,0.085125337044398
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,65535,0.300650676091512
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,2047,0.01321600005030632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,fp8,65535,0.15825066963831583
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,8191,0.04779199759165446
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,16,8,128,1,float16,float16,32767,0.15557333827018738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,float16,1,0.02128533273935318
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,float16,3,0.021013334393501282
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,fp8,3,0.018917333334684372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,float16,7,0.021130666136741638
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,fp8,7,0.019039999693632126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,float16,15,0.02147199958562851
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,fp8,1,0.019760000209013622
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,fp8,15,0.01913600042462349
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,float16,31,0.02102400114138921
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,fp8,31,0.01907733331123988
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,float16,63,0.021269333859284718
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,fp8,63,0.019440000255902607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,float16,127,0.021520001192887623
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,fp8,127,0.01958400011062622
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,fp8,255,0.019167999426523846
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,1,128,1,float16,float16,255,0.0315733328461647
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,float16,1,0.028592000404993694
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,fp8,1,0.027322667340437572
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,float16,3,0.028213332096735638
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,float16,7,0.027232001225153606
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,fp8,3,0.026399999856948853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,fp8,7,0.02716800073782603
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,float16,15,0.02770666778087616
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,fp8,15,0.025626666843891144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,float16,31,0.028581333657105763
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,float16,63,0.02956266701221466
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,fp8,63,0.025253333151340485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,fp8,31,0.02720533311367035
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,float16,127,0.0359253336985906
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,fp8,127,0.025487999121348064
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,float16,255,0.05211733281612396
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,2,128,1,float16,fp8,255,0.03600533306598663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,float16,1,0.05009066561857859
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,fp8,1,0.04164800047874451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,float16,3,0.049866666396458946
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,fp8,3,0.041802664597829185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,float16,7,0.050111999114354454
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,fp8,7,0.04154133299986521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,float16,15,0.050160000721613564
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,fp8,15,0.04182399809360504
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,float16,31,0.050016000866889954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,fp8,31,0.0415040006240209
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,float16,63,0.050111999114354454
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,fp8,63,0.04213866591453552
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,float16,127,0.05810666580994924
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,fp8,127,0.0476800004641215
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,float16,255,0.09013866384824117
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,4,128,1,float16,fp8,255,0.05825066566467285
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,fp8,1,0.07444266478220622
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,float16,3,0.0846453309059143
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,float16,1,0.08693333466847737
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,fp8,3,0.07474133372306824
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,float16,7,0.08519466718037923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,fp8,7,0.07434666653474171
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,float16,15,0.08656000097592671
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,fp8,15,0.07461333274841309
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,float16,31,0.08481066425641377
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,fp8,31,0.0746506651242574
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,fp8,63,0.0746506651242574
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,float16,63,0.08636800448099773
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,float16,127,0.09685333569844563
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,fp8,127,0.07638933261235555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,float16,255,0.16289599736531576
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,16,8,128,1,float16,fp8,255,0.10001066327095032
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,float16,1,0.033973333736260734
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,fp8,1,0.02932800104220708
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,float16,3,0.03549333413441976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,float16,7,0.03364799916744232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,fp8,7,0.02958400050799052
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,fp8,3,0.02994133283694585
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,float16,15,0.03563733398914337
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,fp8,15,0.029626667499542236
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,fp8,31,0.029605334003766377
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,float16,31,0.03508266558249792
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,float16,63,0.03506666670242945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,fp8,63,0.02942399928967158
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,float16,1,0.051029334465662636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,float16,127,0.03818666686614355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,1,128,1,float16,fp8,127,0.029904000461101532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,fp8,1,0.0422986646493276
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,float16,3,0.05067733426888784
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,float16,7,0.05081599950790405
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,fp8,3,0.04346133271853129
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,fp8,7,0.04173333446184794
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,fp8,15,0.04156800111134847
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,fp8,31,0.04233066737651825
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,float16,15,0.05147199829419454
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,float16,63,0.05041066805521647
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,float16,31,0.051216001311937966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,fp8,63,0.041637333730856575
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,float16,127,0.059658666451772056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,2,128,1,float16,fp8,127,0.047824000318845115
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,float16,1,0.08947733044624329
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,fp8,1,0.07897066573301952
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,float16,3,0.08947733044624329
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,fp8,3,0.08073066671689351
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,float16,7,0.08988799651463826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,fp8,7,0.07965866724650066
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,float16,15,0.0900426705678304
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,fp8,15,0.08092799782752991
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,fp8,31,0.08001066744327545
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,float16,31,0.08974933624267578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,float16,63,0.08974400162696838
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,fp8,63,0.07978133360544841
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,float16,127,0.10410666465759277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,float16,1,0.15987199544906616
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,fp8,1,0.13690132896105447
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,float16,3,0.15846932927767435
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,4,128,1,float16,fp8,127,0.08310399949550629
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,fp8,3,0.13742400209108988
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,float16,7,0.158842662970225
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,fp8,7,0.13713066776593527
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,fp8,15,0.13782933354377747
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,float16,31,0.15897066394488016
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,float16,15,0.1597866714000702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,fp8,31,0.13634666800498962
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,float16,63,0.16038399934768677
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,1,0.010351999973257383
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,1,0.011221333096424738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,3,0.010351999973257383
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,3,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,7,0.009626666704813639
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,7,0.010543999572594961
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,15,0.009370666618148485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,15,0.010490667074918747
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,31,0.009434666484594345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,31,0.010656000425418219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,63,0.009695999945203463
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,63,0.010458666831254959
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,127,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,127,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,255,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,255,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,511,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,fp8,63,0.13610133528709412
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,511,0.01139733319481214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,1023,0.011125333607196808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,2047,0.013471999516089758
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,1023,0.011231999844312668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,2047,0.01340266689658165
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,4095,0.015189333508412043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,4095,0.015306666493415833
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,8191,0.017658667018016178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,8191,0.01708799973130226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,16383,0.03391999999682108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,16383,0.020245333512624104
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,float16,32767,0.05194666484991709
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,1,128,1,float16,fp8,32767,0.03578133384386698
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,1,0.009242666885256767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,1,0.010597333312034607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,3,0.009232000137368837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,3,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,7,0.008992000172535578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,7,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,15,0.00892800030608972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,15,0.009296000003814697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,31,0.00902399979531765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,31,0.00938666673998038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,63,0.009408000235756239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,63,0.009328000247478485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,127,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,127,0.009541333342591921
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,255,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,255,0.00973866693675518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,511,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,511,0.011253333340088526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,1023,0.012784000486135483
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,1023,0.011343999455372492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,2047,0.013104000439246496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,2047,0.012655999511480331
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,4095,0.01333333303531011
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,8191,0.030426666140556335
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,8191,0.017018667111794155
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,float16,127,0.1833546757698059
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,16383,0.049322664737701416
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,16,8,128,1,float16,fp8,127,0.14166399836540222
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,16383,0.032287999987602234
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,fp8,32767,0.05022400120894114
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,1,0.010602666685978571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,4095,0.01515199989080429
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,3,0.010549332946538925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,3,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,2,128,1,float16,float16,32767,0.08636266986529033
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,7,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,7,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,15,0.009306666751702627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,15,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,1,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,31,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,31,0.009359999870260557
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,63,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,127,0.00961599995692571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,127,0.009525333220760027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,255,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,63,0.011370666325092316
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,255,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,511,0.011152000476916632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,511,0.010762666662534079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,1023,0.012768000364303589
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,1023,0.011077333241701126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,2047,0.012949333836634954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,2047,0.015077333897352219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,4095,0.015365333606799444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,4095,0.0308693324526151
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,8191,0.03148799886306127
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,16383,0.08429333567619324
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,16383,0.05076266825199127
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,32767,0.15812800327936807
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,fp8,32767,0.08562133709589641
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,1,0.011514666179815928
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,1,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,3,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,3,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,7,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,7,0.011077333241701126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,15,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,31,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,31,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,63,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,63,0.011141333729028702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,127,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,127,0.010512000570694605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,255,0.011381333072980246
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,255,0.010597333312034607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,511,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,15,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,511,0.011061333119869232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,1023,0.01301866645614306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,4,128,1,float16,float16,8191,0.04769066472848257
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,2047,0.029658667743206024
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,4095,0.04710400104522705
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,4095,0.03030399978160858
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,8191,0.048938666780789696
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,16383,0.15562132994333902
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,16383,0.08449066678682964
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,1023,0.013525333255529404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,2047,0.015125333021084467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,1,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,fp8,32767,0.15613866845766702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,8191,0.08391466736793518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,1,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,7,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,3,0.009226666763424873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,3,0.01128000020980835
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,7,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,15,0.00878399983048439
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,31,0.009413333609700203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,15,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,63,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,16,8,128,1,float16,float16,32767,0.29817066589991253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,63,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,127,0.009925333162148794
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,127,0.010597333312034607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,255,0.009754666437705358
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,511,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,511,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,1023,0.011168000598748526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,1023,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,2047,0.012837332983811697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,2047,0.013061333447694778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,4095,0.015029333531856537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,31,0.010735999792814255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,4095,0.014874666929244995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,8191,0.017071999609470367
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,8191,0.031013332307338715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,16383,0.050479998191197716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,fp8,16383,0.0331839993596077
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,1,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,1,128,1,float16,float16,255,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,3,0.009029333169261614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,7,0.011136000355084738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,7,0.010144000252087912
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,15,0.010735999792814255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,15,0.009706666693091393
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,31,0.00902399979531765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,63,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,31,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,63,0.009061333412925402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,1,0.009919999788204828
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,127,0.00973866693675518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,127,0.00949866697192192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,3,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,255,0.010496000448862711
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,255,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,511,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,511,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,1023,0.012847999731699625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,1023,0.012949333836634954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,4095,0.030506665507952373
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,2047,0.012831999609867731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,4095,0.016741332908471424
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,8191,0.04949333270390829
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,8191,0.03180799881617228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,16383,0.08686932921409607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,fp8,16383,0.05038933455944061
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,1,0.009194666519761086
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,1,0.011237333218256632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,3,0.009717333440979322
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,3,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,7,0.01020800011853377
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,15,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,15,0.010485333700974783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,31,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,31,0.010399999717871347
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,2,128,1,float16,float16,2047,0.013141332815090815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,63,0.010496000448862711
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,63,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,127,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,127,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,255,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,255,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,511,0.011098666737476984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,511,0.01119999960064888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,1023,0.013376000026861826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,7,0.010533332824707031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,1023,0.012874666601419449
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,2047,0.029872000217437744
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,2047,0.015189333508412043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,4095,0.04824000100294749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,4095,0.03075733284155528
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,8191,0.0498933345079422
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,8191,0.0844106674194336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,1,0.009568000212311745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,float16,16383,0.15518933534622192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,12,4,128,1,float16,fp8,16383,0.08601066470146179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,3,0.009312000125646591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,7,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,7,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,15,0.00933333362142245
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,15,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,31,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,31,0.010682666053374609
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,63,0.009205333267649015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,63,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,127,0.009343999748428663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,127,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,255,0.009312000125646591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,255,0.008901333436369896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,511,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,511,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,1023,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,1023,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,2047,0.011178666104873022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,2047,0.01137599969903628
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,4095,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,4095,0.011253333340088526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,8191,0.015450666348139444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,8191,0.015077333897352219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,16383,0.019354666272799175
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,16383,0.019391999890406925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,1,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,32767,0.02607999990383784
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,32767,0.027295999228954315
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,65535,0.029706666866938274
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,float16,65535,0.031541332602500916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,1,0.00891733355820179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,1,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,3,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,3,0.0099093330403169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,7,0.009242666885256767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,7,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,15,0.009850666547815004
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,15,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,31,0.008832000195980072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,63,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,31,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,63,0.009183999771873156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,127,0.009370666618148485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,255,0.00892800030608972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,255,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,511,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,511,0.010575999816258749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,1023,0.0102186668664217
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,1023,0.010533332824707031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,2047,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,2047,0.010640000303586325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,4095,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,4095,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,8191,0.015002666662136713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,8191,0.015226667126019796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,16383,0.019093333433071773
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,16383,0.019306667149066925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,32767,0.021333334346612293
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,1,128,1,float16,fp8,3,0.00973866693675518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,float16,65535,0.02349333216746648
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,65535,0.021370666722456615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,1,0.00877333308259646
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,1,0.008762666955590248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,3,0.00902399979531765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,3,0.008912000184257826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,7,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,7,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,127,0.010293333480755487
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,15,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,31,0.009493333597977957
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,63,0.00890666681031386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,63,0.009253333633144697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,127,0.00902399979531765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,127,0.008986666798591614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,255,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,2,128,1,float16,fp8,32767,0.021344001094500225
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,255,0.0099093330403169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,511,0.010549332946538925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,511,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,1023,0.010608000059922537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,1023,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,2047,0.010565333068370819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,2047,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,4095,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,15,0.008869333192706108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,4095,0.011109333485364914
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,31,0.009429333110650381
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,8191,0.015322666615247726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,16383,0.01756799966096878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,16383,0.01710933322707812
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,32767,0.019589333484570186
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,32767,0.01932799940307935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,65535,0.03786666691303253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,1,0.00943999985853831
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,1,0.009408000235756239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,3,0.00933333362142245
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,3,0.009269333134094873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,7,0.009354666496316591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,7,0.010239999741315842
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,15,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,15,0.009248000259200731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,31,0.009306666751702627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,31,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,63,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,63,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,float16,8191,0.014954666296641031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,127,0.009322666873534521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,127,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,255,0.009093333035707474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,255,0.009392000113924345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,511,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,511,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,1023,0.010735999792814255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,1023,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,2047,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,2047,0.01129066695769628
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,4095,0.011061333119869232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,12,4,128,1,float16,fp8,65535,0.021514666577180225
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,8191,0.016261332978804905
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,16383,0.020261333634455998
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,32767,0.023354666928450268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,32767,0.02312533309062322
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,65535,0.025477332373460133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,65535,0.024357333779335022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,1,0.009375999992092451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,1,0.009882666791478792
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,3,0.010053333515922228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,3,0.009893333539366722
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,4095,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,7,0.00978133330742518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,7,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,fp8,16383,0.019466667125622433
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,15,0.009301333377758661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,15,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,31,0.009093333035707474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,63,0.00892800030608972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,63,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,127,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,127,0.00903466654320558
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,255,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,255,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,511,0.010474666953086853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,511,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,1023,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,1023,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,2047,0.011146667102972666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,2047,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,4095,0.011136000355084738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,4095,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,8191,0.015050667027632395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,16383,0.01738133281469345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,16383,0.017008000363906223
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,1,128,1,float16,float16,8191,0.015386667102575302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,32767,0.019199999670187633
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,float16,65535,0.03640533238649368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,31,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,1,0.010384000216921171
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,1,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,3,0.010293333480755487
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,3,0.011365332951148352
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,7,0.011317333827416102
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,8191,0.015237333873907724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,7,0.011061333119869232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,15,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,32767,0.019424000134070713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,31,0.009861333295702934
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,31,0.009418666362762451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,2,128,1,float16,fp8,65535,0.019381333142518997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,63,0.009904000287254652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,63,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,127,0.008885333314538002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,127,0.010101333260536194
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,255,0.010213333492477735
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,255,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,511,0.010735999792814255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,511,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,1023,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,1023,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,2047,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,4095,0.013269333789745966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,4095,0.013045333325862885
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,8191,0.015482666591803232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,8191,0.014896000425020853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,16383,0.01716800034046173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,16383,0.016293333222468693
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,15,0.009253333633144697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,32767,0.03429866582155228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,32767,0.01930133377512296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,65535,0.05232533315817515
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,fp8,65535,0.03570133447647095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,1,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,1,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,3,0.011215999722480774
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,3,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,12,4,128,1,float16,float16,2047,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,7,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,15,0.01080000028014183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,15,0.010661333799362183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,31,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,31,0.009898666913310686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,63,0.009349333122372627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,63,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,127,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,127,0.011333333949247995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,255,0.012272000312805176
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,255,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,511,0.011429333438475927
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,7,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,1023,0.013194666554530462
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,1023,0.013295999417702356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,2047,0.014869333555301031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,4095,0.031871999303499855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,8191,0.0503359983364741
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,511,0.012853333105643591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,8191,0.0332640012105306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,float16,2047,0.01522133375207583
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,1,0.011045332998037338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,1,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,3,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,1,128,1,float16,fp8,4095,0.017445333302021027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,7,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,3,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,7,0.011045332998037338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,15,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,15,0.010090666512648264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,31,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,31,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,63,0.010538666198650995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,63,0.010234666367371878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,127,0.010480000327030817
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,127,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,255,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,255,0.01032533310353756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,511,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,511,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,1023,0.013093333691358566
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,1023,0.012938667088747025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,2047,0.02977066735426585
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,2047,0.015087999403476715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,4095,0.03148266673088074
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,8191,0.08488000432650249
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,1,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,fp8,8191,0.04915733138720194
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,1,0.011178666104873022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,3,0.011264000087976456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,3,0.011205332974592844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,7,0.011231999844312668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,7,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,15,0.011802667131026586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,15,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,31,0.011503999431928
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,31,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,63,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,63,0.01080000028014183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,127,0.012298667182525
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,255,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,255,0.011141333729028702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,511,0.013242666920026144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,511,0.012944000462690989
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,1023,0.02961066613594691
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,1023,0.015237333873907724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,2,128,1,float16,float16,4095,0.04903466502825419
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,2047,0.0481279989083608
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,127,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,4095,0.0844586690266927
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,2047,0.03166933357715607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,1,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,1,0.010485333700974783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,float16,8191,0.15654399991035461
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,8191,0.08630399902661641
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,3,0.009706666693091393
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,3,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,7,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,7,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,15,0.008799999952316284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,15,0.009226666763424873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,31,0.010090666512648264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,31,0.008810666700204214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,63,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,127,0.008757333581646284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,127,0.00919999989370505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,255,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,255,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,511,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,511,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,1023,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,1023,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,2047,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,12,4,128,1,float16,fp8,4095,0.04976533353328705
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,2047,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,4095,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,4095,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,8191,0.01545599972208341
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,8191,0.017231999586025875
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,16383,0.018298666924238205
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,16383,0.01718933383623759
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,32767,0.02065066620707512
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,32767,0.019317333896954853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,float16,65535,0.03766400118668874
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,63,0.010618666807810465
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,1,0.008949333180983862
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,1,0.009338666374484697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,3,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,3,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,7,0.00985599992175897
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,7,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,15,0.00943999985853831
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,31,0.008986666798591614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,15,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,31,0.009328000247478485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,63,0.009002666920423508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,63,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,127,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,255,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,511,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,511,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,1023,0.009893333539366722
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,1023,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,2047,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,1,128,1,float16,fp8,65535,0.02213866760333379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,2047,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,4095,0.012954667210578918
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,4095,0.013194666554530462
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,8191,0.014965333044528961
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,8191,0.015274666249752045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,16383,0.016421332955360413
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,32767,0.03344533344109853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,127,0.009749333063761393
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,255,0.010415999839703241
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,65535,0.0528106689453125
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,1,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,65535,0.03571200122435888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,1,0.009962666779756546
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,3,0.010437333335479101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,3,0.010559999694426855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,7,0.009402666861812273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,7,0.010202666744589806
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,15,0.009349333122372627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,15,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,31,0.009413333609700203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,31,0.010368000095089277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,float16,16383,0.01720533271630605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,63,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,63,0.00979200005531311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,127,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,255,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,255,0.009685333197315535
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,2,128,1,float16,fp8,32767,0.017125333348910015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,511,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,511,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,1023,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,1023,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,2047,0.012863999853531519
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,4095,0.014245333770910898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,4095,0.014629332969586054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,8191,0.015274666249752045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,8191,0.015381333728631338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,16383,0.03188266605138779
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,16383,0.01794133335351944
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,32767,0.05018133421738943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,32767,0.03398400048414866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,float16,65535,0.08637866377830505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,65535,0.051455999414126076
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,127,0.01055466632048289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,1,0.011231999844312668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,1,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,3,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,12,4,128,1,float16,fp8,2047,0.012986666212479273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,3,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,7,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,7,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,15,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,15,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,31,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,31,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,63,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,63,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,127,0.010575999816258749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,127,0.01080000028014183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,255,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,255,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,511,0.012495999534924826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,1023,0.012896000097195307
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,511,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,1023,0.012789333860079447
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,2047,0.03057066599527995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,2047,0.01725333308180173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,float16,4095,0.04985600213209788
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,1,128,1,float16,fp8,4095,0.03169599920511246
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,1,0.011962667107582092
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,1,0.011328000575304031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,3,0.01139733319481214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,7,0.011429333438475927
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,3,0.011525332927703857
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,7,0.011434666812419891
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,15,0.011141333729028702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,15,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,31,0.011616000284751257
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,31,0.011178666104873022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,63,0.01118933285276095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,127,0.012229333321253458
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,127,0.011061333119869232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,255,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,255,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,511,0.014021333307027817
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,511,0.013072000195582708
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,1023,0.03013866643110911
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,63,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,1023,0.01525866612792015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,2047,0.050010666251182556
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,float16,4095,0.08545066912968953
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,2047,0.03154666721820831
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,2,128,1,float16,fp8,4095,0.05055999755859375
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,1,0.013338666409254074
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,1,0.012970666090647379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,3,0.012917333592971167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,3,0.014959999670584997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,7,0.013552000125249227
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,7,0.012960000584522883
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,15,0.013199999928474426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,15,0.013482666263977686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,31,0.012949333836634954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,31,0.014757333944241205
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,63,0.013237333546082178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,63,0.013487999637921652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,127,0.01310933381319046
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,127,0.013631999492645264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,255,0.013007999708255133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,255,0.013104000439246496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,511,0.016906666258970898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,511,0.030799999833106995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,1023,0.04885333279768626
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,1023,0.03172266731659571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,2047,0.050885334610939026
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,2047,0.08592533071835835
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,float16,4095,0.1585546632607778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,12,4,128,1,float16,fp8,4095,0.08845866719881694
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,1,0.012634667257467905
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,1,0.012847999731699625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,3,0.012975999464591345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,3,0.012965332716703415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,7,0.01301866645614306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,15,0.012815999488035837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,15,0.012901333471139273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,31,0.012949333836634954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,31,0.012810666114091873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,7,0.012997332960367203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,63,0.0129120002190272
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,63,0.012842666357755661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,127,0.012805332740147909
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,127,0.013114667187134424
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,255,0.013157332936922709
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,255,0.01293333371480306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,511,0.015168000012636185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,511,0.015024000157912573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,float16,1023,0.03160000095764796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,1,0.014335999886194864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,1,0.013056000073750814
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,3,0.014789332946141561
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,3,0.012965332716703415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,7,0.014069333672523499
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,1,128,1,float16,fp8,1023,0.01700266698996226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,7,0.013530666629473368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,15,0.013834666460752487
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,15,0.01332266628742218
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,31,0.014794666320085526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,31,0.013418667018413544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,63,0.012869333227475485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,63,0.01309866706530253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,127,0.01320533330241839
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,127,0.012944000462690989
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,255,0.013221333424250284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,255,0.01331199953953425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,511,0.029919999341169994
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,511,0.01700266698996226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,1,0.018698666244745255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,float16,1023,0.0498933345079422
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,2,128,1,float16,fp8,1023,0.032058666149775185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,1,0.01720533271630605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,3,0.01904533306757609
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,3,0.01704000060757001
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,7,0.018863999595244724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,7,0.017290666699409485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,15,0.018986667195955913
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,15,0.017093333105246227
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,31,0.018901333212852478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,31,0.01706133286158244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,63,0.019007999449968338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,63,0.017162666966517765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,127,0.01905599981546402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,127,0.017008000363906223
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,255,0.030282666285832722
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,255,0.017397332936525345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,511,0.048800001541773476
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,511,0.03161599983771642
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,float16,1023,0.08479467034339905
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,12,4,128,1,float16,fp8,1023,0.05004799862702688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,float16,1,0.015034666905800501
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,fp8,1,0.015034666905800501
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,float16,3,0.015301333119471868
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,float16,7,0.015157333264748255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,fp8,3,0.015237333873907724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,fp8,7,0.014997333288192749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,float16,15,0.015461333096027374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,fp8,15,0.01509333277742068
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,float16,31,0.01505600040157636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,fp8,31,0.015103999525308609
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,float16,63,0.015210667004187902
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,fp8,63,0.01492799942692121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,fp8,127,0.014842666685581207
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,float16,127,0.015322666615247726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,float16,255,0.015322666615247726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,fp8,255,0.015082667271296183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,float16,511,0.03136000037193298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,1,128,1,float16,fp8,511,0.018992000569899876
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,float16,1,0.019215999792019527
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,fp8,1,0.017045332739750545
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,float16,3,0.018437333405017853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,fp8,3,0.017429333180189133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,float16,7,0.01911466692884763
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,fp8,7,0.017055999487638474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,float16,15,0.01859733338157336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,fp8,15,0.01701333373785019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,float16,31,0.01907733331123988
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,fp8,31,0.01709866647919019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,float16,63,0.018570666511853535
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,fp8,63,0.017029333859682083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,float16,127,0.018944000204404194
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,fp8,127,0.016949333250522614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,float16,255,0.030821333328882854
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,float16,511,0.05040533343950907
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,fp8,511,0.03182400017976761
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,2,128,1,float16,fp8,255,0.017509333789348602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,float16,1,0.027903998891512554
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,fp8,1,0.025973332424958546
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,float16,3,0.027818667391935985
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,float16,7,0.027269333600997925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,fp8,3,0.025461333493391674
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,fp8,7,0.026634665826956432
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,float16,15,0.027621333797772724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,float16,31,0.02808533360560735
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,fp8,31,0.026101333399613697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,fp8,15,0.025461333493391674
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,float16,63,0.028602667152881622
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,fp8,63,0.02550933261712392
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,float16,127,0.03472533325354258
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,fp8,127,0.025205334027608235
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,float16,255,0.05052266518274943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,1,0.00895999992887179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,fp8,255,0.03512533257404963
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,1,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,3,0.009557333464423815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,7,0.008869333192706108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,3,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,7,0.008992000172535578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,15,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,15,0.008938666433095932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,31,0.010709332923094431
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,31,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,63,0.010693332801262537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,63,0.00943999985853831
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,127,0.010805333654085795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,127,0.008992000172535578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,255,0.011045332998037338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,255,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,511,0.011274666835864386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,fp8,511,0.05315199991067251
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,1023,0.011237333218256632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,12,4,128,1,float16,float16,511,0.08679466446240743
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,2047,0.01128000020980835
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,4095,0.01509333277742068
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,1023,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,4095,0.015226667126019796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,2047,0.01128000020980835
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,8191,0.016496000190575916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,8191,0.015856000284353893
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,16383,0.01932799940307935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,16383,0.01721599946419398
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,32767,0.0349440003434817
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,32767,0.019754666835069656
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,float16,65535,0.05373866856098175
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,65535,0.03684266656637192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,1,0.009381333366036415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,1,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,3,0.00961599995692571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,7,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,1,128,1,float16,fp8,511,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,7,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,15,0.008837333569924036
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,15,0.009312000125646591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,31,0.00938666673998038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,31,0.00960533320903778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,63,0.009002666920423508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,63,0.009503999724984169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,127,0.00898133342464765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,255,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,255,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,511,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,511,0.011183999478816986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,3,0.009338666374484697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,1023,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,2047,0.013050666699806849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,1023,0.011221333096424738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,2047,0.01301866645614306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,4095,0.013013333082199097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,4095,0.014245333770910898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,8191,0.015087999403476715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,16383,0.01703466723362605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,32767,0.050341332952181496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,32767,0.03322133421897888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,127,0.009375999992092451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,65535,0.08664533495903015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,fp8,65535,0.0516533354918162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,1,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,1,0.00973866693675518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,3,0.008965333302815756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,3,0.010362666721145311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,7,0.00873066671192646
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,7,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,15,0.00867733359336853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,15,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,8191,0.01533866673707962
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,31,0.00897066667675972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,31,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,63,0.009413333609700203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,63,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,2,128,1,float16,float16,16383,0.03258133431275686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,127,0.009472000102202097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,127,0.010608000059922537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,255,0.01051733394463857
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,511,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,511,0.011146667102972666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,1023,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,1023,0.011258666714032492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,2047,0.012784000486135483
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,2047,0.012810666114091873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,4095,0.014959999670584997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,4095,0.01463466634353002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,8191,0.029792000850041706
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,8191,0.015200000256299973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,16383,0.04909333089987437
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,32767,0.08453333377838135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,32767,0.049973333875338234
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,65535,0.15651200215021768
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,65535,0.08738666772842407
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,float16,255,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,12,4,128,1,float16,fp8,16383,0.0314026673634847
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,float16,1,0.021370666722456615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,fp8,1,0.019413333386182785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,float16,3,0.021125334004561108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,fp8,3,0.01934933289885521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,float16,7,0.02109333376089732
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,float16,15,0.021429332594076794
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,fp8,7,0.01932799940307935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,fp8,15,0.019258666783571243
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,float16,31,0.02146666745344798
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,fp8,31,0.019071999937295914
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,fp8,63,0.019610666980346043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,float16,63,0.02179199953873952
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,fp8,127,0.019002666076024372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,float16,255,0.03139200061559677
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,float16,127,0.021295999487241108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,1,128,1,float16,fp8,255,0.019146667172511418
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,float16,1,0.027797333896160126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,fp8,1,0.027136000494162243
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,float16,3,0.02755733331044515
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,float16,7,0.027850667635599773
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,fp8,3,0.027386667827765148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,fp8,7,0.027119999130566914
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,float16,15,0.027903998891512554
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,fp8,15,0.02553066611289978
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,float16,31,0.02918400118748347
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,fp8,31,0.02720000098148982
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,float16,63,0.027669332921504974
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,fp8,63,0.025333332518736523
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,float16,127,0.035546667873859406
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,float16,255,0.05179200073083242
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,fp8,127,0.027136000494162243
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,2,128,1,float16,fp8,255,0.03585600107908249
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,fp8,1,0.04164266586303711
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,float16,1,0.05017066498597463
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,float16,3,0.05037866532802582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,fp8,3,0.04160533348719279
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,float16,7,0.050106664498647056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,fp8,7,0.04186666508515676
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,float16,15,0.050000001986821495
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,fp8,15,0.04174399872620901
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,fp8,31,0.0417546679576238
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,float16,63,0.05077866713205973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,float16,31,0.05050666630268097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,fp8,63,0.041936000188191734
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,fp8,127,0.04766400158405304
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,float16,127,0.05745600163936615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,float16,255,0.09058133761088054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,12,4,128,1,float16,fp8,255,0.058133333921432495
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,float16,1,0.035589332381884255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,fp8,1,0.029722665747006733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,float16,3,0.035487999518712364
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,fp8,3,0.029333333174387615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,float16,7,0.03389866650104523
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,float16,15,0.03551466763019562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,fp8,15,0.029311999678611755
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,fp8,7,0.029701332251230877
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,float16,31,0.03518400092919668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,fp8,31,0.029279999434947968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,float16,63,0.03397866586844126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,fp8,63,0.02961066613594691
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,float16,127,0.03789866715669632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,1,128,1,float16,fp8,127,0.029839999973773956
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,float16,1,0.050016000866889954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,fp8,1,0.04387733340263367
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,float16,3,0.05093333125114441
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,fp8,3,0.04271999994913737
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,float16,7,0.05006400247414907
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,fp8,7,0.042133331298828125
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,fp8,15,0.04171733558177948
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,float16,31,0.04986133178075155
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,float16,15,0.05086933573087057
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,float16,63,0.05020800232887268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,float16,127,0.05977599819501241
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,fp8,31,0.04242133100827535
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,fp8,63,0.04193066557248434
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,2,128,1,float16,fp8,127,0.04782933493455251
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,float16,1,0.08975999553998311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,fp8,1,0.07977066437403361
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,float16,3,0.0893440047899882
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,fp8,3,0.07957333326339722
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,float16,7,0.09050666292508443
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,float16,15,0.08865599830945332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,fp8,7,0.08024533092975616
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,fp8,15,0.078575998544693
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,float16,31,0.09067199627558391
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,fp8,31,0.0792799989382426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,float16,127,0.10436800122261047
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,fp8,63,0.08070933322111766
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,1,0.010122666756312052
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,3,0.01007466639081637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,1,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,3,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,7,0.009503999724984169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,7,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,15,0.010053333515922228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,15,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,31,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,31,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,63,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,fp8,127,0.0830506682395935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,127,0.009269333134094873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,127,0.010015999898314476
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,255,0.009701333319147428
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,255,0.009296000003814697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,511,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,511,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,1023,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,1023,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,2047,0.012853333105643591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,2047,0.012917333592971167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,4095,0.015040000279744467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,4095,0.015040000279744467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,8191,0.01721599946419398
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,8191,0.01739199956258138
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,16383,0.03339199970165888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,16383,0.019093333433071773
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,63,0.010384000216921171
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,float16,32767,0.05180266499519348
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,1,128,1,float16,fp8,32767,0.035530666510264076
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,1,0.009343999748428663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,1,0.009461333354314169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,3,0.008997333546479544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,3,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,7,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,7,0.00919999989370505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,15,0.009269333134094873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,15,0.009872000043590864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,31,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,31,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,63,0.009269333134094873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,63,0.009290666629870733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,127,0.010005333150426546
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,127,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,255,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,255,0.009322666873534521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,511,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,1023,0.011258666714032492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,12,4,128,1,float16,float16,63,0.09018666545550029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,1023,0.011391999820868174
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,2047,0.013210666676362356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,4095,0.014890667051076889
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,2047,0.012741333494583765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,4095,0.01339200014869372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,8191,0.030368000268936157
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,8191,0.016021333634853363
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,16383,0.03141333411137263
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,16383,0.048997332652409874
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,511,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,float16,32767,0.08667733271916707
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,2,128,1,float16,fp8,32767,0.05086933573087057
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,1,0.009269333134094873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,1,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,3,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,7,0.009183999771873156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,7,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,3,0.01020800011853377
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,15,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,15,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,31,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,31,0.009743999689817429
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,63,0.011264000087976456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,63,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,127,0.010645333677530289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,127,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,255,0.008933333059151968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,255,0.011440000186363855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,511,0.011296000331640244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,511,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,1023,0.01157333329319954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,1023,0.013007999708255133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,2047,0.014943999548753103
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,2047,0.013077333569526672
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,4095,0.0308746670683225
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,4095,0.01747200017174085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,8191,0.04842133323351542
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,8191,0.03146666785081228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,16383,0.08429333567619324
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,16383,0.05036800106366476
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,fp8,32767,0.08689600229263306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,12,4,128,1,float16,float16,32767,0.156058669090271
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,1,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,1,0.009525333220760027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,3,0.008810666700204214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,3,0.009093333035707474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,7,0.008954666554927826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,7,0.009413333609700203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,15,0.008752000207702318
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,15,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,31,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,31,0.00972800018886725
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,63,0.00879466657837232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,63,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,127,0.008885333314538002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,127,0.010581333190202713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,255,0.008901333436369896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,255,0.009237333511312803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,511,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,511,0.01116266722480456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,1023,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,1023,0.011215999722480774
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,2047,0.013061333447694778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,2047,0.012863999853531519
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,4095,0.014922666052977243
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,8191,0.031141333281993866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,8191,0.016762666404247284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,16383,0.031914666295051575
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,32767,0.04980266590913137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,32767,0.08715732892354329
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,1,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,1,0.010559999694426855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,3,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,3,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,7,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,7,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,fp8,4095,0.013221333424250284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,15,0.010640000303586325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,1,128,1,float16,float16,16383,0.04929600159327189
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,31,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,63,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,63,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,127,0.00892800030608972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,127,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,255,0.008874666566650072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,255,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,511,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,511,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,15,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,1023,0.012853333105643591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,1023,0.012042666474978128
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,2047,0.01322666679819425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,2047,0.012874666601419449
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,4095,0.029706666866938274
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,31,0.01027199998497963
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,4095,0.014975999792416891
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,8191,0.04877333343029022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,8191,0.03151999910672506
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,16383,0.0491839994986852
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,16383,0.08596799770991008
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,float16,32767,0.15851733088493347
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,1,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,2,128,1,float16,fp8,32767,0.08618133266766866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,1,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,3,0.010384000216921171
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,7,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,7,0.010650667051474253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,15,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,15,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,31,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,31,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,63,0.00955200009047985
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,63,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,127,0.010239999741315842
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,255,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,127,0.01137599969903628
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,255,0.010666667173306147
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,511,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,3,0.010565333068370819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,511,0.011589333415031433
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,1023,0.012896000097195307
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,1023,0.012890666723251343
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,2047,0.03002133220434189
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,2047,0.015850666910409927
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,4095,0.04780800143877665
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,4095,0.030165334542592365
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,8191,0.08505066235860188
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,8191,0.049829334020614624
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,16383,0.08592533071835835
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,16383,0.15545599659283957
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,1,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,1,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,fp8,32767,0.15776532888412476
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,4,128,1,float16,float16,32767,0.29916266600290936
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,3,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,3,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,15,0.010693332801262537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,15,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,7,0.011274666835864386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,7,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,31,0.011098666737476984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,31,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,63,0.011546666423479715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,63,0.011333333949247995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,127,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,127,0.011173332730929056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,255,0.011359999577204386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,255,0.01126933346192042
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,511,0.012800000607967377
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,511,0.013157332936922709
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,1023,0.02956799914439519
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,1023,0.01532799998919169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,2047,0.030576000610987347
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,2047,0.0476800004641215
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,4095,0.04930666585763296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,4095,0.08438400427500407
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,8191,0.08483200271924336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,8191,0.1562986671924591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,16383,0.15689599514007568
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,16383,0.3011626601219177
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,1,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,1,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,fp8,32767,0.3027413288752238
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,3,0.009130666653315226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,8,8,128,1,float16,float16,32767,0.5920213460922241
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,3,0.009743999689817429
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,7,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,7,0.008976000050703684
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,15,0.009402666861812273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,15,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,31,0.009365333244204521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,31,0.008725333337982496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,63,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,127,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,127,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,255,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,255,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,511,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,511,0.011125333607196808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,1023,0.009472000102202097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,1023,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,2047,0.010602666685978571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,2047,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,4095,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,4095,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,8191,0.015173333386580149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,16383,0.017231999586025875
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,16383,0.019205333044131596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,32767,0.024959998826185863
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,32767,0.025546667476495106
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,65535,0.027999999622503918
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,65535,0.029466666281223297
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,float16,131071,0.030042665700117748
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,131071,0.031530665854612984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,1,0.008879999940594038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,1,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,3,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,3,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,7,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,7,0.00927466650803884
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,15,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,63,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,15,0.009232000137368837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,31,0.008869333192706108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,31,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,63,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,63,0.009183999771873156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,127,0.008954666554927826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,127,0.009402666861812273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,1,128,1,float16,fp8,8191,0.014581333845853806
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,255,0.009290666629870733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,511,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,511,0.011146667102972666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,1023,0.008912000184257826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,1023,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,2047,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,2047,0.011264000087976456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,4095,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,4095,0.01121066634853681
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,8191,0.014085333794355392
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,8191,0.015189333508412043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,16383,0.0173333336909612
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,16383,0.01937066639463107
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,32767,0.021183999876181286
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,32767,0.02109333376089732
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,65535,0.023269332945346832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,65535,0.021242665747801464
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,131071,0.03896533449490865
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,fp8,131071,0.023711999257405598
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,1,0.009237333511312803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,1,0.009370666618148485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,3,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,2,128,1,float16,float16,255,0.00901333304742972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,7,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,7,0.01003200002014637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,15,0.009306666751702627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,15,0.009919999788204828
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,31,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,63,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,63,0.008890666688481966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,127,0.008997333546479544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,127,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,255,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,255,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,511,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,511,0.011445333560307821
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,1023,0.011695999652147293
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,3,0.009461333354314169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,2047,0.01121066634853681
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,2047,0.010522666076819101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,4095,0.011530666301647821
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,4095,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,8191,0.017968000223239262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,31,0.009808000177145004
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,8191,0.015157333264748255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,16383,0.01695999999841054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,16383,0.0176959993938605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,32767,0.019653332730134327
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,32767,0.018885333091020584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,65535,0.02091199904680252
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,65535,0.037674665451049805
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,131071,0.05628266433874766
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,fp8,131071,0.03841066608826319
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,1,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,1,0.009637333452701569
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,3,0.00927466650803884
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,3,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,7,0.009999999776482582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,7,0.009509333098928133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,15,0.008938666433095932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,15,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,31,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,63,0.009242666885256767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,63,0.008949333180983862
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,127,0.009232000137368837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,127,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,255,0.010202666744589806
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,255,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,511,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,511,0.010591999938090643
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,1023,0.010640000303586325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,2047,0.010656000425418219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,2047,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,4095,0.013616000612576803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,4,128,1,float16,float16,1023,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,4095,0.013189333180586496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,8191,0.014981333166360855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,31,0.009610666582981745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,8191,0.01505600040157636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,16383,0.016976000120242436
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,16383,0.01695466662446658
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,32767,0.033413333197434746
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,32767,0.017968000223239262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,65535,0.03514666606982549
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,65535,0.051311999559402466
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,131071,0.08653333783149719
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,1,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,1,0.008976000050703684
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,3,0.009285333255926767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,float16,1023,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,3,0.009429333110650381
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,7,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,7,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,15,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,15,0.010677333921194077
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,31,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,31,0.010762666662534079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,63,0.010762666662534079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,127,0.009962666779756546
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,127,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,255,0.010618666807810465
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,255,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,8,8,128,1,float16,fp8,131071,0.05231466889381409
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,511,0.011418666690587997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,1023,0.010709332923094431
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,511,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,2047,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,1023,0.011557333171367645
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,2047,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,4095,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,4095,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,8191,0.015119999647140503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,8191,0.01504533365368843
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,16383,0.018917333334684372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,16383,0.01915733392039935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,32767,0.021221332252025604
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,63,0.009242666885256767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,32767,0.019482667247454327
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,65535,0.023498666783173878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,65535,0.02146666745344798
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,fp8,131071,0.023631999890009563
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,1,128,1,float16,float16,131071,0.03980266551176707
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,1,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,1,0.00983466642598311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,3,0.009061333412925402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,7,0.009029333169261614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,7,0.009637333452701569
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,15,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,15,0.010570666442314783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,31,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,31,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,63,0.009237333511312803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,63,0.009301333377758661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,127,0.008816000074148178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,127,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,255,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,511,0.01080000028014183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,511,0.011482667177915573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,1023,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,1023,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,2047,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,4095,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,4095,0.011183999478816986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,8191,0.01532799998919169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,8191,0.01532799998919169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,3,0.00897066667675972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,16383,0.01752000053723653
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,16383,0.017130666722853977
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,32767,0.019866666446129482
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,32767,0.019082666685183842
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,65535,0.036533333361148834
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,65535,0.02072000006834666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,255,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,131071,0.05509333312511444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,1,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,3,0.009338666374484697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,float16,2047,0.009797333429257074
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,3,0.010474666953086853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,7,0.009301333377758661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,7,0.009375999992092451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,15,0.00890666681031386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,15,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,31,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,31,0.009482666850090027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,63,0.008912000184257826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,63,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,127,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,127,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,255,0.009343999748428663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,255,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,511,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,511,0.01009599988659223
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,1023,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,2,128,1,float16,fp8,131071,0.03852800031503042
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,2047,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,2047,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,1,0.010650667051474253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,4095,0.012810666114091873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,4095,0.013125333935022354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,8191,0.014981333166360855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,8191,0.014965333044528961
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,16383,0.01684800038735072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,16383,0.0169813334941864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,32767,0.033743999898433685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,65535,0.051728000243504844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,65535,0.034485332667827606
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,float16,131071,0.08664533495903015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,131071,0.05256533126036326
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,1,0.009285333255926767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,1,0.010389333590865135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,3,0.009269333134094873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,1023,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,3,0.010224000240365664
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,7,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,7,0.009743999689817429
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,15,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,15,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,4,128,1,float16,fp8,32767,0.01759999990463257
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,31,0.009242666885256767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,31,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,63,0.010608000059922537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,63,0.009002666920423508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,127,0.010389333590865135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,127,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,255,0.009242666885256767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,255,0.009455999980370203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,511,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,511,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,1023,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,1023,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,4095,0.013045333325862885
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,2047,0.013253333667914072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,8191,0.015402667224407196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,16383,0.03311466674009959
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,16383,0.01777600000301997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,32767,0.04985600213209788
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,32767,0.033439998825391136
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,65535,0.08637866377830505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,65535,0.05093333125114441
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,2047,0.012736000120639801
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,4095,0.014549333602190018
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,131071,0.15683733423550925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,float16,8191,0.015098666151364645
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,8,8,128,1,float16,fp8,131071,0.08658132950464885
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,1,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,1,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,3,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,3,0.009296000003814697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,7,0.010224000240365664
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,15,0.008821333448092142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,7,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,15,0.009183999771873156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,31,0.010565333068370819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,31,0.010368000095089277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,63,0.010650667051474253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,63,0.00961599995692571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,127,0.010687999427318573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,127,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,255,0.010186666622757912
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,255,0.010197333370645842
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,511,0.01121066634853681
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,511,0.011045332998037338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,1023,0.012730666746695837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,1023,0.012416000167528788
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,2047,0.014917333920796713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,2047,0.012975999464591345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,4095,0.01525866612792015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,8191,0.049925332268079124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,8191,0.03136000037193298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,fp8,16383,0.05114666620890299
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,1,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,16383,0.08639466762542725
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,1,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,3,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,3,0.010464000205198923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,7,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,7,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,15,0.009674666449427605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,15,0.010522666076819101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,31,0.01044800008336703
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,1,128,1,float16,float16,4095,0.031066666046778362
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,63,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,63,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,127,0.010602666685978571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,127,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,255,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,255,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,511,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,1023,0.01313599944114685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,1023,0.012821332861979803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,31,0.0106133334338665
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,2047,0.0296426663796107
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,2047,0.015077333897352219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,4095,0.04797866443792979
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,8191,0.04828799764315287
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,4095,0.031311998764673867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,8191,0.08531733353932698
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,511,0.010805333654085795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,float16,16383,0.15818666418393454
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,1,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,1,0.011077333241701126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,3,0.011120000233252844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,7,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,7,0.010735999792814255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,15,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,15,0.011136000355084738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,31,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,31,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,2,128,1,float16,fp8,16383,0.08585600058237712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,63,0.011183999478816986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,3,0.011168000598748526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,63,0.0116799995303154
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,127,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,127,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,255,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,255,0.012074666718641916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,511,0.015066667149464289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,511,0.012928000340859095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,1023,0.01568000018596649
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,2047,0.0317546675602595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,1023,0.03049066662788391
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,2047,0.048714667558670044
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,4095,0.04959466556708018
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,4095,0.08499733606974284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,8191,0.08681066830952962
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,8191,0.1574720044930776
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,float16,16383,0.30082132418950397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,4,128,1,float16,fp8,16383,0.16128533085187277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,1,0.013199999928474426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,1,0.013530666629473368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,3,0.01292266696691513
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,3,0.013221333424250284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,7,0.014997333288192749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,7,0.013338666409254074
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,15,0.013967999567588171
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,15,0.012928000340859095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,31,0.015061333775520325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,31,0.013242666920026144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,63,0.013317332913478216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,63,0.013301332791646322
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,127,0.013728000223636627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,127,0.013114667187134424
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,255,0.013221333424250284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,255,0.013045333325862885
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,1023,0.048826664686203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,511,0.030080000559488933
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,511,0.01659199967980385
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,1023,0.03177600105603536
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,2047,0.08525333801905315
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,2047,0.05020266771316528
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,4095,0.08681066830952962
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,4095,0.15920533736546835
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,8191,0.30477867523829144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,8191,0.1585653324921926
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,1,0.008965333302815756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,1,0.009370666618148485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,fp8,16383,0.30457599957784015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,3,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,7,0.008821333448092142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,7,0.009434666484594345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,15,0.009029333169261614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,15,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,31,0.00892800030608972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,31,0.010506667196750641
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,63,0.008890666688481966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,63,0.010581333190202713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,127,0.008816000074148178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,127,0.010506667196750641
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,255,0.008778666456540426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,255,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,511,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,511,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,1023,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,2047,0.010496000448862711
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,2047,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,4095,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,3,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,8191,0.014970666418472925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,8,8,128,1,float16,float16,16383,0.6005866527557373
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,8191,0.01523200049996376
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,16383,0.017071999609470367
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,16383,0.017290666699409485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,32767,0.01893866683046023
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,32767,0.01894933357834816
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,65535,0.03537066777547201
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,65535,0.021482666333516438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,float16,131071,0.05395199855168661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,131071,0.036373332142829895
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,1,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,1,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,3,0.009008000294367472
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,3,0.009008000294367472
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,7,0.008837333569924036
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,7,0.009194666519761086
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,1023,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,15,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,15,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,31,0.009722666814923286
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,63,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,63,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,1,128,1,float16,fp8,4095,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,127,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,127,0.009829333052039146
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,255,0.00879466657837232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,255,0.009392000113924345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,511,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,511,0.010805333654085795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,1023,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,1023,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,2047,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,2047,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,4095,0.012938667088747025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,4095,0.012944000462690989
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,8191,0.014896000425020853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,8191,0.015290666371583939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,16383,0.01700266698996226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,16383,0.015557333827018738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,32767,0.03367999941110611
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,32767,0.017674667139848072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,65535,0.05150933563709259
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,65535,0.035690667728583016
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,fp8,131071,0.054416000843048096
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,131071,0.0874826709429423
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,2,128,1,float16,float16,31,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,1,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,1,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,3,0.009194666519761086
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,3,0.010319999729593595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,7,0.008890666688481966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,15,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,7,0.011296000331640244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,31,0.00895999992887179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,31,0.011120000233252844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,63,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,63,0.0102613332370917
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,127,0.008896000062425932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,127,0.010496000448862711
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,255,0.009306666751702627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,255,0.008965333302815756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,511,0.011605333536863327
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,511,0.011706666400035223
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,1023,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,2047,0.01292266696691513
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,2047,0.013072000195582708
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,4095,0.01301866645614306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,8191,0.015082667271296183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,8191,0.015226667126019796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,16383,0.03188266605138779
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,16383,0.017237332959969837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,15,0.00892800030608972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,32767,0.05004266897837321
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,32767,0.033674667278925575
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,65535,0.085807998975118
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,65535,0.0517546683549881
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,131071,0.08872000376383464
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,131071,0.15657599767049155
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,1,0.008896000062425932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,1,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,fp8,1023,0.011509332805871964
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,3,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,3,0.008810666700204214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,4,128,1,float16,float16,4095,0.014874666929244995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,7,0.010309333602587381
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,15,0.009253333633144697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,15,0.009130666653315226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,31,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,31,0.010405333091815313
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,63,0.009466666728258133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,63,0.010410666465759277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,127,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,127,0.010666667173306147
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,255,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,255,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,511,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,511,0.01138666644692421
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,1023,0.01116266722480456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,1023,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,2047,0.012794667234023413
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,2047,0.012960000584522883
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,4095,0.015061333775520325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,4095,0.01341333364446958
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,8191,0.015178666760524115
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,8191,0.030213333666324615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,16383,0.049039999643961586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,16383,0.0308746670683225
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,32767,0.08363733688990276
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,32767,0.04956266780694326
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,7,0.008896000062425932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,65535,0.15585600336392721
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,65535,0.08561600248018901
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,fp8,131071,0.15601600209871927
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,8,8,128,1,float16,float16,131071,0.29975465933481854
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,1,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,1,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,3,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,3,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,7,0.010735999792814255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,7,0.010522666076819101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,15,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,15,0.010501333822806677
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,31,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,31,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,63,0.01051733394463857
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,127,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,127,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,255,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,255,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,511,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,511,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,1023,0.012698666503032049
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,63,0.010709332923094431
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,1023,0.012997332960367203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,2047,0.030415999392668407
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,2047,0.015178666760524115
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,4095,0.049413333336512245
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,4095,0.030805334448814392
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,float16,8191,0.08611200253168742
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,1,128,1,float16,fp8,8191,0.05035200218359629
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,1,0.011061333119869232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,1,0.011285333583752314
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,3,0.011424000064531961
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,3,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,7,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,7,0.011274666835864386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,15,0.011343999455372492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,15,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,31,0.011194666226704916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,63,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,63,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,31,0.011674666156371435
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,127,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,127,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,255,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,255,0.012144000579913458
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,511,0.013178666432698568
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,511,0.013088000317414602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,1023,0.02938133229811986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,1023,0.01618133361140887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,2047,0.04901333153247833
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,2047,0.031343999008337654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,4095,0.05026133358478546
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,fp8,8191,0.08729066451390584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,8191,0.1595306694507599
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,1,0.01322666679819425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,1,0.015130666395028433
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,3,0.01479999969402949
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,2,128,1,float16,float16,4095,0.08588799834251404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,3,0.012863999853531519
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,7,0.013183999806642532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,7,0.015002666662136713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,15,0.012890666723251343
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,31,0.014842666685581207
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,31,0.013183999806642532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,63,0.01471466695268949
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,63,0.013093333691358566
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,127,0.014293332894643148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,127,0.013002666334311167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,15,0.013125333935022354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,255,0.014805333067973455
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,255,0.01333333303531011
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,511,0.02995733420054118
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,511,0.016869333883126576
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,1023,0.04842666784922282
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,2047,0.08534933129946391
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,1023,0.03160533308982849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,2047,0.050026665131251015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,4095,0.15954132874806723
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,4095,0.08667733271916707
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,fp8,8191,0.16089600324630737
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,4,128,1,float16,float16,8191,0.30557332436243695
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,1,0.019039999693632126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,3,0.018186666071414948
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,7,0.018191999445358913
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,3,0.01701333373785019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,7,0.01703466723362605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,15,0.018863999595244724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,15,0.016970666746298473
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,31,0.017242666333913803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,1,0.01693333312869072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,31,0.01720000058412552
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,63,0.019215999792019527
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,63,0.01704000060757001
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,127,0.01785600061217944
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,127,0.01701333373785019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,255,0.029535998900731403
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,255,0.016943999876578648
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,511,0.04807466765244802
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,511,0.031386665999889374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,1023,0.04877333343029022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,1023,0.08409066994984944
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,2047,0.15424000223477682
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,2047,0.0851146678129832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,4095,0.29763732353846234
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,4095,0.15523733695348105
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,float16,8191,0.5804373423258463
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,8,8,128,1,float16,fp8,8191,0.29869866371154785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,1,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,1,0.011183999478816986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,3,0.011600000162919363
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,3,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,7,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,7,0.011301333705584208
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,15,0.011242666592200598
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,15,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,31,0.011173332730929056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,31,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,63,0.011178666104873022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,63,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,127,0.011242666592200598
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,127,0.011152000476916632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,255,0.011077333241701126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,255,0.011136000355084738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,511,0.013295999417702356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,511,0.013178666432698568
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,1023,0.03032533327738444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,float16,2047,0.04901333153247833
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,2047,0.0316746657093366
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,1,128,1,float16,fp8,1023,0.016330666840076447
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,1,0.013983999689420065
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,1,0.013167999684810638
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,3,0.016800000021855038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,7,0.013909333695967993
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,3,0.013786666095256805
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,7,0.013007999708255133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,15,0.013002666334311167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,15,0.014938666174809137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,31,0.012938667088747025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,31,0.013631999492645264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,63,0.014485333114862442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,63,0.013301332791646322
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,127,0.014287999520699183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,127,0.013605333864688873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,255,0.013967999567588171
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,255,0.01369599997997284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,511,0.030805334448814392
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,511,0.01740266631046931
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,1023,0.049584001302719116
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,1023,0.032933334509531655
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,float16,2047,0.08734400073687236
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,2,128,1,float16,fp8,2047,0.05020800232887268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,1,0.017136000096797943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,1,0.017194667210181553
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,3,0.01918399954835574
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,7,0.01801066721479098
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,3,0.01741333305835724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,7,0.017504000415404636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,15,0.019274666905403137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,15,0.017082666357358296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,31,0.019333332777023315
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,31,0.01720533271630605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,63,0.01926400015751521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,63,0.017050666113694508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,127,0.01923199991385142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,127,0.0170666662355264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,255,0.02998399982849757
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,255,0.017418666432301205
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,511,0.048394665122032166
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,511,0.03173333406448364
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,1023,0.08398933211962382
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,1023,0.04990933338801066
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,float16,2047,0.15570132931073508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,4,128,1,float16,fp8,2047,0.08646933237711589
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,float16,1,0.027189334233601887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,fp8,1,0.025424001117547352
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,float16,3,0.02752533306678136
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,fp8,3,0.02532266577084859
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,float16,7,0.02757866680622101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,fp8,7,0.02532800038655599
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,float16,15,0.027679999669392902
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,fp8,15,0.025216000775496166
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,float16,31,0.027232001225153606
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,fp8,31,0.025242666403452556
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,float16,63,0.027349332968393963
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,fp8,63,0.025146665672461193
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,float16,127,0.03152533372243246
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,fp8,127,0.025258667767047882
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,float16,255,0.048485333720842995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,fp8,255,0.032602667808532715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,float16,511,0.08469333251317342
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,fp8,511,0.05027199784914652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,float16,1023,0.15495466192563376
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,fp8,1023,0.08598400155703227
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,float16,2047,0.29662932952245075
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,8,8,128,1,float16,fp8,2047,0.1562933325767517
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,float16,1,0.014842666685581207
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,fp8,1,0.013455999394257864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,float16,3,0.013199999928474426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,fp8,3,0.012991999586423239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,float16,7,0.01443733274936676
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,fp8,7,0.013376000026861826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,float16,15,0.01509333277742068
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,fp8,15,0.013007999708255133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,float16,31,0.014965333044528961
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,fp8,31,0.013557333499193192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,float16,63,0.014645333091417948
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,fp8,63,0.013221333424250284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,float16,127,0.01471466695268949
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,fp8,127,0.013557333499193192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,float16,255,0.014730667074521383
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,fp8,255,0.013503999759753546
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,float16,511,0.03163733333349228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,fp8,511,0.016864000509182613
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,float16,1023,0.05045333504676819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,1,128,1,float16,fp8,1023,0.032442666590213776
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,float16,1,0.01907733331123988
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,fp8,1,0.01766933376590411
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,float16,3,0.01924266666173935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,fp8,3,0.01729600007335345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,float16,7,0.01903466631968816
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,fp8,7,0.017162666966517765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,float16,15,0.01932266727089882
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,fp8,15,0.017157333592573803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,float16,31,0.018917333334684372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,fp8,31,0.017157333592573803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,fp8,63,0.01741333305835724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,float16,63,0.01941866676012675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,float16,127,0.019280000279347103
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,fp8,127,0.0173333336909612
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,float16,255,0.031104000906149547
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,float16,511,0.04896000027656555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,fp8,255,0.017802666872739792
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,fp8,511,0.03195200115442276
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,float16,1,0.028202667832374573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,float16,1023,0.08662399649620056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,2,128,1,float16,fp8,1023,0.04993600149949392
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,fp8,1,0.025888000925381977
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,float16,3,0.027727998793125153
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,fp8,3,0.026528000831604004
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,float16,7,0.02754666656255722
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,fp8,7,0.026928000152111053
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,float16,15,0.028042666614055634
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,float16,31,0.02757866680622101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,fp8,15,0.02640533447265625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,fp8,31,0.026176000634829204
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,fp8,63,0.025786665578683216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,float16,63,0.027530667682488758
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,float16,127,0.035562666753927864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,fp8,127,0.02603733291228612
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,float16,255,0.050944000482559204
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,fp8,255,0.035088000198205314
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,float16,511,0.0881173312664032
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,fp8,511,0.05320000151793162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,float16,1,0.0484799991051356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,float16,1023,0.15728533267974854
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,4,128,1,float16,fp8,1023,0.08958933750788371
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,fp8,1,0.04002666721741358
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,float16,3,0.048432002464930214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,fp8,7,0.03984000037113825
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,fp8,3,0.039647998909155525
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,float16,7,0.04826666911443075
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,float16,15,0.048357332746187844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,fp8,15,0.04001066585381826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,float16,31,0.04841599861780802
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,fp8,31,0.039834665755430855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,float16,63,0.049466664592425026
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,fp8,63,0.040005333721637726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,float16,127,0.05409066875775655
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,fp8,127,0.044010668992996216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,float16,255,0.08737066388130188
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,fp8,255,0.05518400172392527
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,float16,511,0.1572480003039042
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,fp8,511,0.08859200278917949
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,float16,1023,0.2998879949251811
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,1,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,1,0.009392000113924345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,3,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,7,0.00895999992887179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,7,0.008832000195980072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,15,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,15,0.00901333304742972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,31,0.008832000195980072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,31,0.008879999940594038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,63,0.00901333304742972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,63,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,8,8,128,1,float16,fp8,1023,0.15944533546765646
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,127,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,127,0.009328000247478485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,255,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,511,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,255,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,511,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,1023,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,1023,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,2047,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,2047,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,4095,0.013056000073750814
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,4095,0.013295999417702356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,8191,0.016565332810084026
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,8191,0.01482133318980535
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,16383,0.0173333336909612
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,16383,0.016986666868130367
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,32767,0.033359999457995095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,32767,0.01851733277241389
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,65535,0.0518453319867452
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,65535,0.03480533262093862
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,3,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,float16,131071,0.08888000249862671
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,1,128,1,float16,fp8,131071,0.053317333261171974
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,1,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,1,0.009381333366036415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,3,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,3,0.00919999989370505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,7,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,7,0.009290666629870733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,15,0.009061333412925402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,15,0.009637333452701569
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,31,0.009488000224033991
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,63,0.008997333546479544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,63,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,127,0.00919999989370505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,127,0.009952000031868616
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,255,0.00938666673998038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,255,0.0106133334338665
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,511,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,511,0.011343999455372492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,1023,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,1023,0.011215999722480774
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,2047,0.013114667187134424
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,4095,0.014709333578745524
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,4095,0.01331199953953425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,31,0.008805333326260248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,8191,0.015439999600251516
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,8191,0.014922666052977243
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,16383,0.03229333211978277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,16383,0.01693333312869072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,32767,0.033904001116752625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,32767,0.05153066913286845
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,65535,0.08739200234413147
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,65535,0.05237866441408793
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,fp8,131071,0.08803199728329976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,2047,0.012757333616415659
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,1,0.010464000205198923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,1,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,3,0.009237333511312803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,3,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,7,0.008997333546479544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,7,0.013317332913478216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,15,0.01003200002014637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,15,0.008826666822036108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,31,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,31,0.0103946669648091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,63,0.008810666700204214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,63,0.010629333555698395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,127,0.00984533317387104
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,127,0.010608000059922537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,255,0.009472000102202097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,255,0.00956266683836778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,511,0.01118933285276095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,511,0.01116266722480456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,1023,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,1023,0.011205332974592844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,2047,0.013162666310866674
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,2047,0.012506666282812754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,4095,0.015008000036080679
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,4095,0.013647999614477158
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,8191,0.02972800036271413
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,2,128,1,float16,float16,131071,0.15970666209856668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,8191,0.016309333344300587
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,16383,0.04799999793370565
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,16383,0.03156800071398417
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,32767,0.0844586690266927
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,32767,0.050016000866889954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,65535,0.08690133690834045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,65535,0.15559466679890951
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,1,0.009557333464423815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,1,0.008986666798591614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,float16,131071,0.3001599907875061
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,4,128,1,float16,fp8,131071,0.1599573294321696
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,3,0.008816000074148178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,3,0.009503999724984169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,7,0.009434666484594345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,7,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,15,0.00891733355820179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,15,0.01028266673286756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,31,0.009445333232482275
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,63,0.009685333197315535
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,63,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,31,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,127,0.010629333555698395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,127,0.010496000448862711
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,255,0.00895999992887179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,255,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,511,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,1023,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,1023,0.012885333349307379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,2047,0.013045333325862885
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,2047,0.01293333371480306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,4095,0.030432000756263733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,4095,0.015077333897352219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,8191,0.04799999793370565
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,8191,0.03252800057331721
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,16383,0.08373866478602092
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,16383,0.0484746644894282
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,32767,0.08443199594815572
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,32767,0.156442662080129
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,65535,0.2999626596768697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,65535,0.1560373306274414
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,131071,0.29820799827575684
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,float16,131071,0.5893386602401733
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,8,8,128,1,float16,fp8,511,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,float16,1,0.019130667050679524
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,fp8,1,0.01706133286158244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,fp8,3,0.017130666722853977
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,float16,7,0.019071999937295914
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,fp8,7,0.017071999609470367
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,float16,15,0.019130667050679524
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,float16,3,0.01903466631968816
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,fp8,15,0.01703466723362605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,float16,31,0.01905599981546402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,fp8,31,0.017360000560681026
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,float16,63,0.019333332777023315
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,fp8,63,0.016976000120242436
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,float16,127,0.018965333700180054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,fp8,127,0.01730666682124138
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,fp8,255,0.01695466662446658
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,float16,255,0.03123733401298523
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,float16,511,0.04991999765237173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,1,128,1,float16,fp8,511,0.03196800003449122
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,float16,1,0.027327999472618103
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,float16,3,0.028304000695546467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,fp8,1,0.025626666843891144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,fp8,3,0.025450666745503742
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,float16,7,0.02777066578467687
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,fp8,7,0.02555199960867564
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,fp8,15,0.025562666356563568
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,float16,15,0.02805333336194356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,float16,31,0.027962667246659596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,fp8,31,0.02553066611289978
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,float16,63,0.02752000093460083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,float16,127,0.03537066777547201
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,fp8,63,0.02587733417749405
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,fp8,127,0.027050666511058807
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,float16,255,0.051274667183558144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,fp8,255,0.03558400024970373
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,fp8,511,0.053770666321118675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,float16,1,0.05009600023428599
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,fp8,1,0.041850666205088295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,2,128,1,float16,float16,511,0.08865066369374593
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,fp8,3,0.041493333876132965
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,float16,3,0.05007466673851013
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,float16,7,0.04979733129342397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,float16,15,0.05003733436266581
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,fp8,7,0.042378668983777366
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,fp8,15,0.041834667325019836
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,float16,31,0.05029866596062978
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,fp8,31,0.04182933270931244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,float16,63,0.05052266518274943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,fp8,63,0.04251199960708618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,float16,127,0.057317331433296204
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,fp8,127,0.047685335079828896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,float16,255,0.09035199880599976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,fp8,255,0.05914666752020518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,float16,511,0.16085333625475565
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,4,128,1,float16,fp8,511,0.09270399808883667
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,float16,1,0.08590400218963623
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,fp8,1,0.07473599910736084
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,float16,3,0.08482133348782857
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,float16,7,0.08596799770991008
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,fp8,3,0.07443200051784515
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,fp8,7,0.07434133191903432
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,float16,15,0.0846560001373291
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,fp8,15,0.07444799939791362
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,float16,31,0.08478933572769165
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,fp8,31,0.07447466750939687
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,float16,63,0.0849120020866394
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,fp8,63,0.08085866769154866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,float16,127,0.09682666261990865
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,fp8,127,0.07660266757011414
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,float16,255,0.1609226663907369
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,fp8,255,0.09757333000500996
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,float16,511,0.3025226593017578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,8,8,128,1,float16,fp8,511,0.16310933232307434
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,float16,1,0.027797333896160126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,fp8,1,0.027141332626342773
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,float16,3,0.028384000062942505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,float16,7,0.02864533414443334
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,fp8,7,0.0271519993742307
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,float16,15,0.027434666951497395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,fp8,3,0.02610666553179423
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,fp8,31,0.025439999997615814
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,fp8,15,0.026629333694775898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,float16,31,0.029648000995318096
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,float16,63,0.02870933214823405
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,fp8,63,0.02554133286078771
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,float16,127,0.035461333890755974
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,fp8,127,0.027242665489514668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,float16,255,0.05193600058555603
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,float16,1,0.049866666396458946
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,1,128,1,float16,fp8,255,0.03589333345492681
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,fp8,1,0.04225599765777588
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,fp8,3,0.04165866722663244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,fp8,7,0.04165866722663244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,float16,7,0.05031999945640564
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,float16,15,0.04984533290068308
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,float16,3,0.05020800232887268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,fp8,15,0.041749333341916404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,float16,31,0.050383999943733215
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,fp8,31,0.04154133299986521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,fp8,63,0.041989331444104515
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,float16,63,0.05077333251635233
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,float16,127,0.05934933324654897
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,fp8,127,0.04769066472848257
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,float16,1,0.08940266569455464
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,float16,255,0.09127466877301534
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,2,128,1,float16,fp8,255,0.05821866790453593
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,fp8,1,0.07865599791208903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,float16,3,0.08877333005269368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,fp8,3,0.07921066880226135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,float16,7,0.09025599559148152
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,fp8,7,0.07901333272457123
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,float16,15,0.08905067046483357
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,fp8,31,0.07979199786980946
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,fp8,15,0.08044800162315369
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,float16,31,0.08912533521652222
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,float16,63,0.08901333808898926
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,fp8,63,0.07994666695594788
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,float16,127,0.10365866621335347
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,fp8,127,0.0830026666323344
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,float16,255,0.16664533813794455
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,float16,1,0.15960533420244852
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,fp8,1,0.13760000467300415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,float16,3,0.15890666842460632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,4,128,1,float16,fp8,255,0.10336533188819885
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,fp8,3,0.13723733027776083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,float16,7,0.15994667013486227
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,fp8,7,0.13733333349227905
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,float16,15,0.15898666779200235
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,fp8,15,0.13820266723632812
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,float16,31,0.15988266468048096
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,fp8,31,0.13758400082588196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,float16,63,0.15845866998036703
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,float16,127,0.18095999956130981
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,fp8,63,0.1377120018005371
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,fp8,127,0.14243200421333313
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,1,0.008976000050703684
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,3,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,1,0.009525333220760027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,3,0.009530666594703993
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,7,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,7,0.009882666791478792
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,15,0.00943999985853831
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,15,0.009722666814923286
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,31,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,31,0.009594666461149851
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,63,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,63,0.009375999992092451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,127,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,127,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,255,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,255,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,511,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,1023,0.01062400018175443
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,511,0.011359999577204386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,1023,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,2047,0.01293333371480306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,2047,0.013130666067202887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,4095,0.0145066666106383
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,4095,0.013237333546082178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,8191,0.015381333728631338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,8191,0.015098666151364645
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,16383,0.032032000521818794
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,16383,0.017498667041460674
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,32767,0.051818668842315674
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,32767,0.03363733241955439
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,fp8,65535,0.052000001072883606
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,1,128,1,float16,float16,65535,0.08783466617266338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,float16,255,0.31008533636728924
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,1,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,1,0.011109333485364914
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,3,0.009029333169261614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,3,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,7,0.009626666704813639
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,15,0.00943999985853831
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,15,0.009375999992092451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,31,0.010602666685978571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,31,0.008992000172535578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,63,0.009904000287254652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,63,0.008954666554927826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,127,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,127,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,7,0.009061333412925402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,255,0.009946666657924652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,255,0.009253333633144697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,511,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,511,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,1023,0.011258666714032492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,1023,0.01119999960064888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,2047,0.0129120002190272
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,4095,0.01471466695268949
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,4095,0.013088000317414602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,8191,0.030085332691669464
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,8191,0.015040000279744467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,16383,0.04930666585763296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,16383,0.03147733211517334
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,32767,0.08544533451398213
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,65535,0.15887999534606934
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,65535,0.08656533559163411
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,1,0.008693333094318708
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,float16,2047,0.012842666357755661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,1,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,3,0.008943999807039896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,3,0.008869333192706108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,7,0.00943999985853831
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,7,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,15,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,15,0.009839999799927076
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,2,128,1,float16,fp8,32767,0.050101334849993386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,31,0.008837333569924036
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,8,8,128,1,float16,fp8,255,0.18388799826304117
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,31,0.010661333799362183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,63,0.010634666929642359
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,127,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,255,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,255,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,511,0.011306667079528173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,511,0.011253333340088526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,127,0.009941333283980688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,63,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,1023,0.013013333082199097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,2047,0.012970666090647379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,2047,0.013424000392357508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,1023,0.011760000139474869
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,4095,0.030634666482607525
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,8191,0.04849599798520406
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,8191,0.032218667368094124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,16383,0.0846666693687439
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,16383,0.050101334849993386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,32767,0.08664000034332275
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,32767,0.15658133228619894
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,float16,65535,0.30049065748850506
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,1,0.01146666705608368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,65535,0.15917866428693137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,1,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,3,0.011120000233252844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,3,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,7,0.010485333700974783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,7,0.011215999722480774
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,15,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,4,128,1,float16,fp8,4095,0.01543466622630755
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,31,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,31,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,63,0.009648000200589498
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,63,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,127,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,127,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,255,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,255,0.011194666226704916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,511,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,511,0.011109333485364914
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,1023,0.013440000514189402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,1023,0.013130666067202887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,2047,0.01526933287580808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,2047,0.029482667644818623
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,4095,0.04776533444722494
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,4095,0.030026666820049286
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,8191,0.08330666522185008
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,15,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,16383,0.08390399813652039
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,32767,0.2999626596768697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,32767,0.15456533432006836
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,65535,0.5864106814066569
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,65535,0.29739199082056683
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,1,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,1,0.009008000294367472
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,3,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,fp8,8191,0.04824000100294749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,3,0.008954666554927826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,7,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,7,0.009482666850090027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,15,0.00898133342464765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,15,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,31,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,31,0.009237333511312803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,63,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,63,0.010533332824707031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,127,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,127,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,255,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,255,0.008826666822036108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,511,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,511,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,1023,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,1023,0.011098666737476984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,2047,0.013199999928474426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,2047,0.011285333583752314
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,4095,0.014959999670584997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,4095,0.013061333447694778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,8191,0.015504000087579092
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,16383,0.04880533119042715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,16383,0.03175999969244003
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,32767,0.08649067083994548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,32767,0.05070933202902476
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,65535,0.16025599837303162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,1,0.008949333180983862
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,fp8,65535,0.08685333530108134
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,1,0.009237333511312803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,3,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,3,0.010591999938090643
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,1,128,1,float16,float16,8191,0.029663999875386555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,7,0.009893333539366722
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,7,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,15,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,15,0.010981333752473196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,31,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,31,0.00884799969693025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,63,0.010650667051474253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,63,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,127,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,127,0.01062400018175443
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,255,0.008879999940594038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,255,0.009850666547815004
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,511,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,511,0.011194666226704916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,1023,0.011610666910807291
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,1023,0.011930666863918304
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,2047,0.013050666699806849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,2047,0.013264000415802002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,8,8,128,1,float16,float16,16383,0.1546239952246348
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,4095,0.030565333863099415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,4095,0.016751999656359356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,8191,0.048954665660858154
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,8191,0.03160000095764796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,16383,0.08618133266766866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,16383,0.0499839981396993
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,32767,0.08673600355784099
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,32767,0.16024532914161682
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,float16,65535,0.3058133323987325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,2,128,1,float16,fp8,65535,0.1605280041694641
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,1,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,3,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,7,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,3,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,7,0.011194666226704916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,15,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,15,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,31,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,31,0.011306667079528173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,63,0.010687999427318573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,63,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,127,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,127,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,255,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,255,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,511,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,1,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,1023,0.012778667112191519
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,2047,0.029279999434947968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,1023,0.012944000462690989
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,2047,0.015008000036080679
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,4095,0.047600001096725464
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,4095,0.031221332649389904
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,8191,0.08297599852085114
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,8191,0.048058668772379555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,16383,0.15506666898727417
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,16383,0.08462933699289958
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,32767,0.29874666531880695
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,32767,0.158053328593572
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,511,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,1,0.009296000003814697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,float16,65535,0.5859573284784952
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,3,0.00926399976015091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,1,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,4,4,128,1,float16,fp8,65535,0.3047093351682027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,3,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,7,0.010437333335479101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,7,0.00926399976015091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,15,0.00873066671192646
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,15,0.008842666943868002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,31,0.009450666606426239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,31,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,63,0.008826666822036108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,63,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,127,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,255,0.00874133345981439
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,255,0.009093333035707474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,511,0.010608000059922537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,511,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,1023,0.009466666728258133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,1023,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,2047,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,2047,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,4095,0.011418666690587997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,4095,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,8191,0.014954666296641031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,8191,0.01515199989080429
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,16383,0.018992000569899876
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,32767,0.025306666890780132
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,65535,0.029530666768550873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,65535,0.029338667790095013
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,131071,0.03126933425664902
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,131071,0.031498665610949196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,1,0.00884799969693025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,1,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,3,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,3,0.008805333326260248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,fp8,127,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,7,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,7,0.009253333633144697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,15,0.00884799969693025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,15,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,31,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,31,0.009205333267649015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,63,0.009194666519761086
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,63,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,16383,0.0170666662355264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,127,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,127,0.009183999771873156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,255,0.00884799969693025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,255,0.009354666496316591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,511,0.012618667135636011
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,511,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,1023,0.010608000059922537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,2047,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,2047,0.010693332801262537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,4095,0.010672000547250112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,4095,0.011226666470368704
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,8191,0.015184000134468079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,8191,0.014805333067973455
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,16383,0.019120000302791595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,16383,0.019205333044131596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,32767,0.021344001094500225
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,32767,0.021776000658671062
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,65535,0.023376000424226124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,65535,0.023152001202106476
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,float16,131071,0.04154666761557261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,1023,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,2,128,1,float16,fp8,131071,0.02499733368555705
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,1,0.00901333304742972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,3,0.009589333087205887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,3,0.009253333633144697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,7,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,7,0.009402666861812273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,15,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,15,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,31,0.008885333314538002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,31,0.00960533320903778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,63,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,63,0.009637333452701569
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,127,0.009573333586255709
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,127,0.009509333098928133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,255,0.00895999992887179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,255,0.009642666826645533
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,511,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,511,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,1,128,1,float16,float16,32767,0.02550933261712392
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,1023,0.010191999996701876
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,2047,0.01062400018175443
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,1,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,4095,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,4095,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,8191,0.015184000134468079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,8191,0.015141333142916361
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,16383,0.017466666797796886
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,32767,0.019205333044131596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,32767,0.019258666783571243
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,65535,0.038015998899936676
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,65535,0.021146667500336964
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,131071,0.05470933516820272
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,float16,1023,0.010640000303586325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,131071,0.038005332152048744
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,1,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,2047,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,1,0.0100426667680343
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,3,0.008746666833758354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,3,0.009093333035707474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,7,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,7,0.009413333609700203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,15,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,15,0.009306666751702627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,31,0.009194666519761086
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,63,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,4,4,128,1,float16,fp8,16383,0.01692266638080279
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,127,0.009359999870260557
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,127,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,255,0.009322666873534521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,255,0.00926399976015091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,511,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,511,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,1023,0.010618666807810465
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,1023,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,2047,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,2047,0.010650667051474253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,4095,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,4095,0.011231999844312668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,8191,0.014874666929244995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,8191,0.015850666910409927
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,16383,0.017423999806245167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,16383,0.01932799940307935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,32767,0.021141332884629566
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,65535,0.02202133337656657
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,65535,0.021312000850836437
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,31,0.009008000294367472
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,float16,131071,0.03951466580231985
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,131071,0.02537599951028824
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,63,0.00903466654320558
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,1,0.00927466650803884
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,1,0.008896000062425932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,3,0.009237333511312803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,3,0.00985599992175897
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,7,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,7,0.009242666885256767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,15,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,31,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,31,0.009183999771873156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,63,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,63,0.009269333134094873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,127,0.009253333633144697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,127,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,255,0.009008000294367472
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,255,0.009130666653315226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,511,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,511,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,1,128,1,float16,fp8,32767,0.021935999393463135
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,1023,0.010064000263810158
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,1023,0.011039999624093374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,2047,0.0102613332370917
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,4095,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,2047,0.011109333485364914
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,4095,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,8191,0.015594666202863058
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,8191,0.015082667271296183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,15,0.009205333267649015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,16383,0.01738133281469345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,16383,0.017125333348910015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,32767,0.019120000302791595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,32767,0.019253333409627277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,65535,0.021312000850836437
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,fp8,131071,0.038058665891488395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,1,0.009226666763424873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,1,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,3,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,3,0.009578666960199675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,7,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,15,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,15,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,31,0.008943999807039896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,31,0.009338666374484697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,63,0.008933333059151968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,63,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,65535,0.03653866549332937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,127,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,127,0.008986666798591614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,2,128,1,float16,float16,131071,0.05712533493836721
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,255,0.009130666653315226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,255,0.008986666798591614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,511,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,511,0.011194666226704916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,1023,0.010357333347201347
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,1023,0.011098666737476984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,2047,0.011183999478816986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,2047,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,4095,0.012970666090647379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,4095,0.013002666334311167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,8191,0.015295999745527903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,8191,0.015322666615247726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,16383,0.01710933322707812
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,16383,0.01681600014368693
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,32767,0.017450666675964992
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,32767,0.03366400053103765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,65535,0.05162666738033295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,65535,0.03549866626660029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,float16,131071,0.08790399630864461
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,1,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,1,0.011440000186363855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,3,0.009455999980370203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,7,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,7,0.011648000528415045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,3,0.010591999938090643
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,7,0.009839999799927076
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,15,0.009957333405812582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,31,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,4,4,128,1,float16,fp8,131071,0.05266133447488149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,31,0.010677333921194077
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,15,0.010677333921194077
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,63,0.00890666681031386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,63,0.008992000172535578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,127,0.010618666807810465
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,127,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,255,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,255,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,511,0.011221333096424738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,511,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,1023,0.012949333836634954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,1023,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,2047,0.013253333667914072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,2047,0.013237333546082178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,4095,0.015109332899252573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,8191,0.04941866795221964
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,8191,0.03192000091075897
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,16383,0.04978133241335551
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,16383,0.08730133374532063
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,32767,0.1612106661001841
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,1,0.009759999811649323
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,fp8,32767,0.08679999907811482
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,1,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,3,0.010581333190202713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,3,0.010591999938090643
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,7,0.010159999753038088
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,7,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,15,0.010618666807810465
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,1,128,1,float16,float16,4095,0.031008000175158184
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,15,0.010687999427318573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,31,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,31,0.010581333190202713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,63,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,63,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,127,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,127,0.010682666053374609
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,255,0.010485333700974783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,255,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,511,0.011434666812419891
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,511,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,1023,0.013082666943470636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,1023,0.012960000584522883
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,2047,0.029461334149042766
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,2047,0.014965333044528961
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,4095,0.048810665806134544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,4095,0.03012266755104065
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,8191,0.08477866649627686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,8191,0.048783997694651283
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,16383,0.15810666481653848
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,16383,0.08657067020734151
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,float16,32767,0.30535467465718585
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,1,0.011029332876205444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,2,128,1,float16,fp8,32767,0.15956266721089682
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,3,0.011231999844312668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,1,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,7,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,15,0.011045332998037338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,7,0.011952000359694162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,15,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,31,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,63,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,31,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,63,0.011173332730929056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,127,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,127,0.011098666737476984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,255,0.011136000355084738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,3,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,255,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,511,0.013183999806642532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,511,0.01322666679819425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,1023,0.029301332930723827
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,1023,0.015263999501864115
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,2047,0.04821866750717163
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,2047,0.03142400085926056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,4095,0.08468266328175862
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,4095,0.050016000866889954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,8191,0.1562986671924591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,16383,0.300927996635437
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,16383,0.1588320036729177
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,1,0.008789333204428354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,1,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,3,0.008832000195980072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,float16,32767,0.5921333233515421
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,3,0.008805333326260248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,32767,0.3063093423843384
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,7,0.008832000195980072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,7,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,15,0.009093333035707474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,31,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,31,0.008762666955590248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,63,0.009194666519761086
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,63,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,127,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,127,0.008832000195980072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,255,0.009226666763424873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,4,4,128,1,float16,fp8,8191,0.08657067020734151
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,255,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,511,0.010597333312034607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,1023,0.010656000425418219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,511,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,1023,0.010421333213647207
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,2047,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,4095,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,2047,0.01118933285276095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,4095,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,8191,0.014922666052977243
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,16383,0.016938666502634685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,8191,0.015487999965747198
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,32767,0.019440000255902607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,32767,0.01939733326435089
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,65535,0.0364533339937528
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,65535,0.021375998854637146
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,131071,0.054458667834599815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,1,0.008879999940594038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,131071,0.037861332297325134
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,float16,15,0.009232000137368837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,1,0.00903466654320558
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,3,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,3,0.009930666536092758
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,7,0.008816000074148178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,7,0.008816000074148178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,15,0.00919999989370505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,15,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,31,0.008805333326260248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,31,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,63,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,63,0.009583999713261923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,127,0.00898133342464765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,127,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,255,0.008912000184257826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,255,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,511,0.01119999960064888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,511,0.011066666493813196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,1023,0.011264000087976456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,1023,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,2047,0.010890666395425797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,1,128,1,float16,fp8,16383,0.01695999999841054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,2047,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,4095,0.013050666699806849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,8191,0.015311999867359797
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,8191,0.015087999403476715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,16383,0.017279999951521557
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,16383,0.016858667135238647
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,32767,0.017418666432301205
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,32767,0.03409066547950109
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,65535,0.0524533341328303
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,65535,0.035429333647092186
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,131071,0.08851733803749084
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,fp8,131071,0.05442133545875549
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,1,0.00943999985853831
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,1,0.009941333283980688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,3,0.009450666606426239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,3,0.010202666744589806
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,7,0.009461333354314169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,7,0.010064000263810158
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,2,128,1,float16,float16,4095,0.013050666699806849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,15,0.009493333597977957
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,15,0.009877333417534828
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,31,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,31,0.010559999694426855
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,63,0.01003200002014637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,127,0.00895999992887179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,127,0.010784000158309937
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,255,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,511,0.011354666203260422
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,511,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,1023,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,1023,0.011418666690587997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,2047,0.01314666618903478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,2047,0.012975999464591345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,4095,0.013061333447694778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,4095,0.013754667093356451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,8191,0.015098666151364645
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,8191,0.014965333044528961
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,16383,0.032111999889214836
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,16383,0.017653333644072216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,32767,0.050106664498647056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,32767,0.03443199892838796
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,63,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,65535,0.08538666367530823
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,131071,0.08784000078837077
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,131071,0.15609600146611533
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,float16,255,0.009722666814923286
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,1,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,1,0.010634666929642359
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,3,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,3,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,7,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,7,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,4,4,128,1,float16,fp8,65535,0.052058666944503784
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,15,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,15,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,31,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,31,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,63,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,63,0.010575999816258749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,127,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,127,0.011551999797423681
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,255,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,255,0.010661333799362183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,511,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,511,0.011445333560307821
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,1023,0.012874666601419449
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,1023,0.01312000056107839
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,2047,0.029706666866938274
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,2047,0.015237333873907724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,4095,0.04868266483147939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,4095,0.031680000325044
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,8191,0.08550399541854858
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,8191,0.049957334995269775
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,float16,16383,0.16060266892115274
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,1,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,1,128,1,float16,fp8,16383,0.0869813362757365
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,1,0.011168000598748526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,3,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,7,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,15,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,7,0.011343999455372492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,15,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,31,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,31,0.011461333682139715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,63,0.01116266722480456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,3,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,63,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,127,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,127,0.011125333607196808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,255,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,511,0.013658666362365087
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,511,0.012938667088747025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,1023,0.029578665892283123
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,1023,0.014933332800865173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,2047,0.04901866614818573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,2047,0.03133866687615713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,255,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,4095,0.08608000477155049
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,4095,0.04994666576385498
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,8191,0.16031466921170553
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,8191,0.08795199791590373
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,float16,16383,0.3059733311335246
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,1,0.013365333278973898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,2,128,1,float16,fp8,16383,0.16115199526151022
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,3,0.014085333794355392
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,3,0.013669333110253016
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,1,0.013167999684810638
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,7,0.013183999806642532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,15,0.013855999956528345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,7,0.014042666802803675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,15,0.013066666821638743
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,31,0.013834666460752487
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,31,0.01303999995191892
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,63,0.014864000181357065
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,63,0.013178666432698568
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,127,0.014357333381970724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,127,0.012896000097195307
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,255,0.014901333798964819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,255,0.01320533330241839
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,511,0.030213333666324615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,511,0.01699200024207433
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,1023,0.04961599906285604
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,2047,0.08559999863306682
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,1023,0.03215466688076655
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,2047,0.049882665276527405
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,4095,0.08739200234413147
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,4095,0.15898666779200235
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,8191,0.16170133153597513
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,8191,0.3052533268928528
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,fp8,16383,0.3094613353411357
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,1,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,4,4,128,1,float16,float16,16383,0.5989493529001871
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,1,0.011253333340088526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,3,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,3,0.011861333002646765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,7,0.011055999745925268
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,7,0.011285333583752314
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,15,0.011322667201360067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,15,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,31,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,31,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,63,0.011237333218256632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,63,0.011146667102972666
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,127,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,255,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,127,0.01156266654531161
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,255,0.011226666470368704
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,511,0.013797332843144735
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,511,0.013061333447694778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,1023,0.015392000476519266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,1023,0.03046400099992752
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,2047,0.04878933231035868
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,2047,0.03143999973932902
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,1,0.014778666198253632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,float16,4095,0.08740267157554626
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,1,0.013034666577974955
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,1,128,1,float16,fp8,4095,0.05061866839726766
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,3,0.015157333264748255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,3,0.013530666629473368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,7,0.014175999909639359
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,15,0.013093333691358566
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,7,0.013002666334311167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,15,0.013151999562978745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,31,0.01403733342885971
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,31,0.013007999708255133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,63,0.014730667074521383
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,63,0.01357866699496905
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,127,0.01431999976436297
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,127,0.013066666821638743
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,255,0.014858666807413101
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,255,0.01340266689658165
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,511,0.029711998999118805
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,511,0.017184000462293625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,1023,0.04966933528582255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,1023,0.03249600032965342
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,2047,0.08730666836102803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,2047,0.0497920016447703
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,fp8,4095,0.08892800410588582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,2,128,1,float16,float16,4095,0.16129600008328757
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,float16,1,0.017221332838137943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,fp8,1,0.017242666333913803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,float16,3,0.019082666685183842
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,fp8,3,0.017173333714405697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,fp8,7,0.0169813334941864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,float16,15,0.018992000569899876
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,float16,31,0.0182239996890227
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,fp8,15,0.01720000058412552
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,fp8,31,0.01717866708834966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,float16,7,0.01746133342385292
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,float16,63,0.02035733312368393
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,fp8,63,0.017093333105246227
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,float16,127,0.018906666586796444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,fp8,127,0.017136000096797943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,float16,255,0.03011200080315272
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,fp8,255,0.01746133342385292
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,float16,511,0.0483893354733785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,fp8,511,0.03129599988460541
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,float16,1023,0.0842026670773824
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,fp8,1023,0.04987200101216634
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,fp8,2047,0.08502399921417236
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,float16,4095,0.2972799936930339
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,fp8,4095,0.1602186659971873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,4,4,128,1,float16,float16,2047,0.15495999654134116
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,float16,1,0.013237333546082178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,fp8,1,0.013061333447694778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,float16,3,0.014495999862750372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,float16,7,0.013066666821638743
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,fp8,7,0.012837332983811697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,float16,15,0.015018666783968607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,fp8,15,0.012762666990359625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,float16,31,0.014165333161751429
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,fp8,31,0.013232000172138214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,fp8,3,0.013546666751305262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,float16,63,0.012885333349307379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,fp8,63,0.01313599944114685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,float16,127,0.01504533365368843
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,float16,255,0.014602666099866232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,fp8,127,0.013130666067202887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,fp8,255,0.012863999853531519
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,float16,511,0.031354665756225586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,fp8,511,0.016810666769742966
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,float16,1023,0.04975466430187225
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,fp8,1023,0.03155199935038885
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,float16,2047,0.08806399504343669
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,1,128,1,float16,fp8,2047,0.051130667328834534
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,fp8,1,0.016997333616018295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,float16,1,0.019050666441520054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,float16,3,0.019018666197856266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,float16,7,0.018394666413466137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,fp8,3,0.017071999609470367
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,fp8,7,0.0169813334941864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,float16,15,0.018735999862353008
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,fp8,15,0.0169813334941864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,float16,31,0.01921066641807556
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,fp8,31,0.017018667111794155
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,float16,63,0.019039999693632126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,fp8,63,0.017157333592573803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,float16,127,0.01907733331123988
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,fp8,127,0.01684800038735072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,float16,255,0.029813334345817566
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,fp8,255,0.017024000485738117
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,float16,511,0.04839999973773956
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,fp8,511,0.03148266673088074
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,float16,1023,0.08558400472005208
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,fp8,1023,0.04957866668701172
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,float16,2047,0.15849066774050394
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,fp8,1,0.02517866591612498
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,float16,1,0.027952000498771667
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,float16,3,0.027722666660944622
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,fp8,3,0.02516266703605652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,float16,7,0.02757333219051361
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,2,128,1,float16,fp8,2047,0.08603733777999878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,fp8,7,0.025242666403452556
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,float16,15,0.027637332677841187
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,fp8,15,0.025477332373460133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,fp8,31,0.025616000096003216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,float16,63,0.027290667096773785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,fp8,63,0.025519999365011852
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,float16,31,0.028549333413441975
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,float16,127,0.033770665526390076
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,fp8,127,0.025311999022960663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,float16,255,0.050517335534095764
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,fp8,255,0.03545066714286804
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,float16,511,0.08587200442949931
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,fp8,511,0.05246399839719137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,float16,1023,0.1576746702194214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,fp8,1023,0.08964799841245015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,1,0.009482666850090027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,3,0.009568000212311745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,float16,2047,0.3000640074412028
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,3,0.009381333366036415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,7,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,15,0.009269333134094873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,7,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,15,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,4,4,128,1,float16,fp8,2047,0.15980266531308493
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,31,0.00892800030608972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,1,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,31,0.009312000125646591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,63,0.00897066667675972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,63,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,127,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,255,0.00879466657837232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,511,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,511,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,1023,0.011061333119869232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,1023,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,2047,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,2047,0.012341332932313284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,4095,0.013274667163689932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,4095,0.012837332983811697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,8191,0.015130666395028433
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,8191,0.014848000059525171
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,16383,0.017551999539136887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,16383,0.01635733370979627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,32767,0.03401600072781245
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,32767,0.017653333644072216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,65535,0.053120002150535583
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,65535,0.03408533334732056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,131071,0.08874666690826416
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,127,0.010549332946538925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,float16,255,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,1,128,1,float16,fp8,131071,0.054117331902186074
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,1,0.0100426667680343
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,1,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,3,0.009493333597977957
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,7,0.008997333546479544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,3,0.009583999713261923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,7,0.009418666362762451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,15,0.00902399979531765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,31,0.009349333122372627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,31,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,63,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,63,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,127,0.009359999870260557
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,127,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,255,0.009423999736706415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,255,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,511,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,511,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,1023,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,1023,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,2047,0.013242666920026144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,4095,0.01414399966597557
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,2047,0.013877333452304205
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,4095,0.01381333296497663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,8191,0.015274666249752045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,8191,0.014970666418472925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,16383,0.01720000058412552
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,32767,0.050661335388819374
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,32767,0.03346666693687439
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,15,0.00966933307548364
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,65535,0.05203733344872793
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,fp8,131071,0.08877866466840108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,131071,0.15941866238911948
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,1,0.008879999940594038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,1,0.010037333394090334
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,3,0.009061333412925402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,3,0.010805333654085795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,7,0.008799999952316284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,7,0.010618666807810465
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,15,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,15,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,31,0.008896000062425932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,31,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,16383,0.0330826664964358
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,63,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,63,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,127,0.009125333279371262
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,127,0.00955200009047985
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,2,128,1,float16,float16,65535,0.08673066894213359
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,255,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,255,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,511,0.010618666807810465
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,511,0.010687999427318573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,1023,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,1023,0.01129066695769628
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,2047,0.01303999995191892
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,2047,0.01110400011142095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,4095,0.01498666654030482
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,4095,0.013023999830087027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,8191,0.029717333614826202
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,8191,0.015024000157912573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,16383,0.0488319993019104
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,16383,0.03159466634194056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,32767,0.04981866478919983
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,65535,0.15491732954978943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,65535,0.08694400389989217
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,131071,0.2993866602579753
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,fp8,131071,0.15849600235621134
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,4,4,128,1,float16,float16,32767,0.0841439962387085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,fp8,1,0.0170666662355264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,float16,3,0.01878400022784869
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,float16,1,0.018874666343132656
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,fp8,3,0.017184000462293625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,float16,7,0.019029332945744198
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,fp8,7,0.01709866647919019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,float16,15,0.018730666488409042
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,fp8,15,0.017397332936525345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,float16,31,0.01903466631968816
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,fp8,31,0.016938666502634685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,float16,63,0.019167999426523846
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,fp8,63,0.01720000058412552
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,float16,127,0.01905599981546402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,fp8,127,0.017114666601022083
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,float16,255,0.03029866764942805
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,fp8,255,0.017184000462293625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,float16,511,0.048997332652409874
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,fp8,511,0.03126933425664902
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,fp8,1023,0.050426666935284935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,float16,1,0.02754133443037669
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,1,128,1,float16,float16,1023,0.08611200253168742
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,fp8,1,0.025514667232831318
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,float16,3,0.02918400118748347
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,fp8,3,0.025360000630219776
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,fp8,7,0.025413334369659424
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,float16,7,0.029029332101345062
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,float16,15,0.027285332481066387
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,fp8,15,0.02535466601451238
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,float16,31,0.028624000648657482
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,fp8,31,0.026144000391165417
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,fp8,63,0.02534399926662445
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,float16,63,0.028677334388097126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,float16,127,0.03496533383925756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,fp8,127,0.02646933247645696
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,float16,255,0.05089599887530009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,fp8,255,0.03513066718975703
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,float16,511,0.08711466193199158
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,fp8,511,0.05296533306439718
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,float16,1023,0.15991999705632529
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,2,128,1,float16,fp8,1023,0.09050666292508443
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,float16,1,0.05053866902987162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,fp8,1,0.04201599955558777
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,float16,3,0.04993066688378652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,float16,7,0.050106664498647056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,fp8,3,0.04201066493988037
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,float16,15,0.049813335140546165
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,fp8,7,0.04187199970086416
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,fp8,15,0.04167999823888143
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,float16,31,0.050106664498647056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,fp8,31,0.04204800228277842
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,float16,63,0.050426666935284935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,fp8,63,0.04168533285458883
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,float16,127,0.05860800047715505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,fp8,127,0.046666666865348816
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,float16,255,0.08868799606959026
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,fp8,255,0.05702400207519531
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,float16,511,0.1597813367843628
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,fp8,511,0.09087999661763509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,fp8,1023,0.16356799999872842
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,4,4,128,1,float16,float16,1023,0.30135466655095416
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,float16,1,0.029450667401154835
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,fp8,1,0.025429333249727886
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,float16,3,0.027456000447273254
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,float16,7,0.02940266579389572
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,fp8,7,0.02536533276240031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,fp8,3,0.025589334468046825
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,float16,15,0.027322667340437572
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,float16,31,0.029338667790095013
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,fp8,15,0.026426665484905243
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,fp8,31,0.025445332129796345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,float16,63,0.028965334097544353
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,fp8,63,0.025248001019159954
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,float16,127,0.03573866685231527
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,fp8,127,0.02534399926662445
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,float16,255,0.05156800150871277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,fp8,255,0.03555733213822047
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,float16,511,0.08846400181452434
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,1,128,1,float16,fp8,511,0.053360000252723694
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,float16,1,0.05062933266162872
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,fp8,1,0.04200000067551931
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,float16,3,0.05050133168697357
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,fp8,3,0.04161600023508072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,fp8,7,0.04162666698296865
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,fp8,15,0.041536000867684685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,float16,15,0.050373335679372154
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,float16,7,0.050341332952181496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,float16,31,0.04976533353328705
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,float16,63,0.05020266771316528
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,fp8,31,0.04196799794832865
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,fp8,63,0.04208533465862274
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,float16,127,0.0583840012550354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,fp8,127,0.048250665267308555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,float16,255,0.09088533123334248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,float16,511,0.16423466801643372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,fp8,255,0.058890665570894875
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,2,128,1,float16,fp8,511,0.09365866581598918
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,float16,1,0.0888320008913676
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,fp8,1,0.07884266475836436
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,fp8,3,0.0798880010843277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,float16,7,0.08869333068529765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,fp8,7,0.07878399888674419
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,float16,3,0.0906986693541209
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,fp8,15,0.07911466558774312
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,float16,15,0.090037335952123
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,float16,31,0.08917333682378133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,float16,63,0.08876267075538635
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,fp8,63,0.08029333253701527
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,float16,127,0.10170132915178935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,fp8,31,0.07939200103282928
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,fp8,127,0.08291733264923096
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,float16,255,0.16554133097330728
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,1,0.009258666386206945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,1,0.010805333654085795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,3,0.009578666960199675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,3,0.009685333197315535
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,7,0.009541333342591921
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,7,0.009557333464423815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,15,0.009477333476146063
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,15,0.0102186668664217
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,31,0.009434666484594345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,31,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,63,0.009413333609700203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,fp8,255,0.10227732857068379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,63,0.009461333354314169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,127,0.009402666861812273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,255,0.009722666814923286
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,255,0.009402666861812273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,127,0.010138666878143946
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,511,0.011194666226704916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,511,0.011274666835864386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,1023,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,1023,0.011194666226704916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,2047,0.013338666409254074
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,2047,0.012837332983811697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,4095,0.014901333798964819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,4095,0.014666666587193808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,8191,0.01695466662446658
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,8191,0.016154666741689045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,16383,0.018592000007629395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,16383,0.03346666693687439
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,32767,0.0510453333457311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,32767,0.033887999753157295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,65535,0.08796800176302592
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,65535,0.05239999790986379
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,float16,131071,0.16113066673278809
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,1,128,1,float16,fp8,131071,0.08990400036176045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,1,0.009338666374484697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,fp8,511,0.17012266318003336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,3,0.009429333110650381
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,1,0.009690666571259499
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,3,0.009296000003814697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,7,0.009477333476146063
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,15,0.009248000259200731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,15,0.00938666673998038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,31,0.009317333499590555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,31,0.009418666362762451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,63,0.009568000212311745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,63,0.009434666484594345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,127,0.009717333440979322
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,127,0.009450666606426239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,255,0.009338666374484697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,7,0.009301333377758661
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,255,0.010362666721145311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,511,0.011152000476916632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,511,0.011098666737476984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,1023,0.011173332730929056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,1023,0.011247999966144562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,2047,0.012784000486135483
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,2047,0.012346666306257248
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,4095,0.013093333691358566
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,8191,0.03038399914900462
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,8191,0.015184000134468079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,16383,0.04919999837875366
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,16383,0.03165333221356074
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,32767,0.08673066894213359
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,32767,0.05037866532802582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,65535,0.15901333093643188
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,65535,0.0862559974193573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,4,4,128,1,float16,float16,511,0.3064746658007304
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,fp8,131071,0.15896532932917276
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,131071,0.3043253421783447
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,2,128,1,float16,float16,4095,0.015034666905800501
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,1,0.008837333569924036
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,3,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,3,0.009445333232482275
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,7,0.009050666665037474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,7,0.008874666566650072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,15,0.009183999771873156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,15,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,31,0.00890666681031386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,31,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,63,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,63,0.009818666925032934
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,127,0.010565333068370819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,127,0.009621333330869675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,255,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,255,0.010821333775917688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,511,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,511,0.011152000476916632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,1023,0.012858666479587555
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,1023,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,2047,0.013125333935022354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,2047,0.013104000439246496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,4095,0.029605334003766377
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,8191,0.04854399959246317
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,8191,0.03141333411137263
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,16383,0.04971200227737427
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,32767,0.15571733315785727
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,32767,0.08566400408744812
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,65535,0.2990559935569763
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,1,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,4095,0.015184000134468079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,65535,0.15839999914169312
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,131071,0.5862666765848795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,1,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,float16,16383,0.08476266264915466
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,4,4,128,1,float16,fp8,131071,0.3041973312695821
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,3,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,3,0.010533332824707031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,1,0.009866666669646898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,7,0.009061333412925402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,7,0.010037333394090334
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,15,0.009493333597977957
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,31,0.00884799969693025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,31,0.01032533310353756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,63,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,15,0.01027199998497963
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,63,0.008757333581646284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,127,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,127,0.009237333511312803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,255,0.009445333232482275
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,255,0.009935999910036722
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,511,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,511,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,1023,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,1023,0.01129066695769628
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,2047,0.012741333494583765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,2047,0.012586666891972223
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,4095,0.014901333798964819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,8191,0.02959466725587845
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,4095,0.013621332744757334
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,8191,0.01540800059835116
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,16383,0.04964800179004669
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,16383,0.03209600100914637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,32767,0.0865226686000824
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,32767,0.0498986691236496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,65535,0.08814400434494019
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,65535,0.16244799892107645
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,1,0.010293333480755487
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,float16,131071,0.307861328125
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,1,0.010565333068370819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,3,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,1,128,1,float16,fp8,131071,0.1606986622015635
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,3,0.008943999807039896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,7,0.011173332730929056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,7,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,15,0.009173333023985228
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,15,0.009594666461149851
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,31,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,31,0.010714666297038397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,63,0.008912000184257826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,63,0.00919999989370505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,127,0.011328000575304031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,127,0.010725333044926325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,255,0.009408000235756239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,255,0.00892800030608972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,511,0.011253333340088526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,511,0.011418666690587997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,1023,0.011098666737476984
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,1023,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,2047,0.015066667149464289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,2047,0.01328533391157786
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,4095,0.029968000948429108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,4095,0.015247999380032221
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,8191,0.049733335773150124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,16383,0.0860746701558431
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,8191,0.03146133323510488
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,16383,0.04991999765237173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,32767,0.15917332967122397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,32767,0.0862613320350647
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,65535,0.15941866238911948
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,65535,0.3046773274739583
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,1,0.00897066667675972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,1,0.00978133330742518
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,fp8,131071,0.30615999301274616
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,3,0.009786666681369146
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,2,2,128,1,float16,float16,131071,0.6003520091374716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,7,0.00926399976015091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,15,0.009349333122372627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,15,0.00961599995692571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,31,0.009381333366036415
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,31,0.01071999967098236
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,63,0.009237333511312803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,63,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,127,0.009183999771873156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,127,0.010453333457310995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,255,0.009392000113924345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,255,0.01055466632048289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,511,0.010762666662534079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,1023,0.01055466632048289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,1023,0.010661333799362183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,2047,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,2047,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,4095,0.01119999960064888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,4095,0.011253333340088526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,7,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,8191,0.015392000476519266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,16383,0.01939733326435089
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,16383,0.019445333629846573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,32767,0.027162666122118633
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,32767,0.027562665442625683
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,511,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,65535,0.029605334003766377
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,65535,0.029125332832336426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,131071,0.031311998764673867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,131071,0.029450667401154835
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,1,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,3,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,3,0.009205333267649015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,float16,3,0.009232000137368837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,7,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,7,0.009450666606426239
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,15,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,15,0.009375999992092451
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,31,0.009093333035707474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,31,0.009557333464423815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,1,128,1,float16,fp8,8191,0.014959999670584997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,63,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,63,0.00921066664159298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,127,0.00884799969693025
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,127,0.009029333169261614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,255,0.008869333192706108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,255,0.008912000184257826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,511,0.009999999776482582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,511,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,1023,0.009813333551088968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,1023,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,2047,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,2047,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,4095,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,4095,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,8191,0.015509333461523056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,16383,0.01924266666173935
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,16383,0.019109333554903667
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,32767,0.021189334491888683
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,32767,0.021322667598724365
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,65535,0.023365333676338196
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,65535,0.02367999901374181
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,131071,0.04080000023047129
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,fp8,131071,0.02606400102376938
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,1,0.01032533310353756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,3,0.009888000165422758
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,3,0.01007466639081637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,1,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,7,0.009754666437705358
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,7,0.009343999748428663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,15,0.010255999863147736
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,15,0.00972800018886725
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,31,0.009898666913310686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,63,0.009205333267649015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,63,0.009648000200589498
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,127,0.0102186668664217
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,1,0.008821333448092142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,127,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,255,0.009189333145817121
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,255,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,511,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,511,0.011322667201360067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,1023,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,2,2,128,1,float16,float16,8191,0.01545599972208341
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,1023,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,2047,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,2047,0.010693332801262537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,4095,0.011525332927703857
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,8191,0.014959999670584997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,31,0.010079999764760336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,8191,0.01524266724785169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,16383,0.01912533367673556
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,32767,0.02145066608985265
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,32767,0.021274665991465252
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,65535,0.023520000278949738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,65535,0.02312533309062322
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,131071,0.02510400116443634
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,131071,0.04298133154710134
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,1,0.009429333110650381
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,1,0.010330666477481524
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,3,0.011605333536863327
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,3,0.009328000247478485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,7,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,7,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,15,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,15,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,31,0.009205333267649015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,31,0.010682666053374609
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,63,0.00922133338948091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,63,0.010762666662534079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,127,0.00891733355820179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,127,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,fp8,4095,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,255,0.009589333087205887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,511,0.01073066641887029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,1,128,1,float16,float16,16383,0.01923199991385142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,511,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,1023,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,2047,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,2047,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,4095,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,4095,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,8191,0.015429332852363586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,8191,0.014997333288192749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,16383,0.01720533271630605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,32767,0.01911466692884763
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,32767,0.019205333044131596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,65535,0.036943999429543815
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,65535,0.02110933264096578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,131071,0.05493866900602976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,255,0.009205333267649015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,131071,0.03821333249409994
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,fp8,1023,0.010608000059922537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,1,0.009226666763424873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,1,0.009130666653315226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,3,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,3,0.009573333586255709
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,7,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,2,2,128,1,float16,float16,16383,0.017082666357358296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,7,0.009008000294367472
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,15,0.00996800015370051
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,15,0.01003200002014637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,31,0.009194666519761086
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,31,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,63,0.01027199998497963
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,63,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,127,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,127,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,255,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,255,0.008885333314538002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,511,0.011215999722480774
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,1023,0.011215999722480774
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,511,0.011365332951148352
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,1023,0.013013333082199097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,2047,0.013082666943470636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,2047,0.013104000439246496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,4095,0.031343999008337654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,8191,0.04868266483147939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,8191,0.03215466688076655
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,4095,0.0161920003592968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,16383,0.08715200424194336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,16383,0.04980266590913137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,32767,0.16048533717791238
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,32767,0.08776533603668213
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,1,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,fp8,65535,0.1599573294321696
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,1,128,1,float16,float16,65535,0.30751999219258624
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,3,0.010847999403874079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,3,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,1,0.010213333492477735
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,7,0.010474666953086853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,7,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,15,0.011018666128317514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,31,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,15,0.010453333457310995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,31,0.010703999549150467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,63,0.010954666882753372
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,63,0.010538666198650995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,127,0.011898666620254517
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,127,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,255,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,255,0.010629333555698395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,511,0.011071999867757162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,511,0.011168000598748526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,1023,0.013013333082199097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,2047,0.029578665892283123
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,4095,0.04818133513132731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,2047,0.014959999670584997
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,4095,0.03071466585000356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,8191,0.0851200024286906
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,8191,0.04934933284918467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,16383,0.08513066172599792
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,16383,0.15872533122698465
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,1023,0.012831999609867731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,32767,0.3039413293202718
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,32767,0.15956266721089682
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,1,0.009306666751702627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,1,0.009103999783595404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,3,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,float16,65535,0.5964746475219727
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,3,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,7,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,15,0.009226666763424873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,15,0.009626666704813639
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,7,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,31,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,2,2,128,1,float16,fp8,65535,0.3054506580034892
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,31,0.010602666685978571
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,63,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,63,0.00926399976015091
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,127,0.010538666198650995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,255,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,127,0.008901333436369896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,511,0.010842667271693548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,511,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,1023,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,1023,0.011285333583752314
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,2047,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,2047,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,4095,0.011130666981140772
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,4095,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,8191,0.015082667271296183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,8191,0.014890667051076889
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,16383,0.01693333312869072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,32767,0.01926933353145917
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,32767,0.019178666174411774
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,65535,0.0367999995748202
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,131071,0.054832001527150474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,131071,0.03818133225043615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,1,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,1,0.009514666472872099
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,3,0.00902399979531765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,255,0.009237333511312803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,3,0.00938666673998038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,7,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,7,0.00943999985853831
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,15,0.009114666531483332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,15,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,31,0.009365333244204521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,31,0.009770666559537252
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,63,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,float16,16383,0.017466666797796886
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,63,0.010277333358923594
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,127,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,127,0.00979200005531311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,255,0.009194666519761086
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,1,128,1,float16,fp8,65535,0.02075733368595441
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,255,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,511,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,511,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,1023,0.009984000275532404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,1023,0.011077333241701126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,2047,0.010656000425418219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,2047,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,4095,0.01314666618903478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,4095,0.013450667262077332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,8191,0.015018666783968607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,8191,0.015087999403476715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,16383,0.017077332983414333
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,16383,0.017125333348910015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,32767,0.03344533344109853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,32767,0.017466666797796886
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,65535,0.05172266562779745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,65535,0.03570133447647095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,float16,131071,0.08669333656628926
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,2,2,128,1,float16,fp8,131071,0.0545653353134791
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,1,0.011002667248249054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,1,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,3,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,7,0.010837333897749582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,7,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,15,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,31,0.010778666784365972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,31,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,63,0.010565333068370819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,15,0.011823999385039011
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,3,0.010565333068370819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,127,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,127,0.010901333143313726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,255,0.010698666175206503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,255,0.010741333166758219
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,511,0.011050666371981302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,511,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,1023,0.012906666845083237
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,63,0.01089599976936976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,1023,0.012810666114091873
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,2047,0.030293333033720653
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,2047,0.014954666296641031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,4095,0.04860266546408335
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,4095,0.03126399964094162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,8191,0.049456000328063965
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,8191,0.0853760043780009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,16383,0.08714133501052856
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,16383,0.16025599837303162
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,float16,32767,0.3072320024172465
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,1,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,1,128,1,float16,fp8,32767,0.1591253379980723
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,1,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,3,0.011226666470368704
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,3,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,7,0.010928000013033548
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,7,0.01118933285276095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,15,0.011141333729028702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,31,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,15,0.01108266661564509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,31,0.011141333729028702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,63,0.011136000355084738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,63,0.01099733387430509
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,127,0.012128000458081564
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,127,0.011173332730929056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,255,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,255,0.010858666151762009
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,511,0.014416000495354334
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,511,0.013194666554530462
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,1023,0.02961066613594691
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,1023,0.014783999572197596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,2047,0.04837866624196371
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,4095,0.08683199683825175
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,2047,0.0314026673634847
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,4095,0.04941866795221964
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,8191,0.15961066881815592
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,8191,0.08825066685676575
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,16383,0.16154133280118307
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,16383,0.3076266646385193
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,fp8,32767,0.3112746675809224
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,1,0.01109333336353302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,2,2,128,1,float16,float16,32767,0.6006720066070557
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,1,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,3,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,3,0.011061333119869232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,7,0.011343999455372492
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,7,0.01102399950226148
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,15,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,15,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,31,0.011136000355084738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,31,0.011429333438475927
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,63,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,63,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,127,0.011642667154471079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,127,0.011370666325092316
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,255,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,255,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,511,0.013306666165590286
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,511,0.01322666679819425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,1023,0.030293333033720653
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,1023,0.015290666371583939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,2047,0.03180266668399175
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,2047,0.04966933528582255
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,4095,0.08682133754094441
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,4095,0.04985066751639048
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,float16,8191,0.1602026621500651
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,1,128,1,float16,fp8,8191,0.08810133735338847
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,float16,3,0.013023999830087027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,float16,1,0.015034666905800501
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,fp8,1,0.01312000056107839
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,fp8,3,0.013183999806642532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,float16,7,0.013002666334311167
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,float16,15,0.013194666554530462
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,fp8,7,0.013306666165590286
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,fp8,15,0.013280000537633896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,float16,31,0.013157332936922709
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,fp8,31,0.012863999853531519
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,float16,63,0.014938666174809137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,fp8,63,0.01312000056107839
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,float16,127,0.012997332960367203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,fp8,127,0.013359999905029932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,float16,255,0.01414399966597557
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,fp8,255,0.013183999806642532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,float16,511,0.030415999392668407
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,fp8,511,0.01710933322707812
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,float16,1023,0.04951466619968414
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,fp8,1023,0.03196266790231069
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,float16,2047,0.08676266670227051
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,fp8,2047,0.05100266635417938
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,float16,4095,0.16101866960525513
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,fp8,4095,0.08822932839393616
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,float16,8191,0.3102826674779256
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,2,2,128,1,float16,fp8,8191,0.1604586640993754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,float16,1,0.014831999937693277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,fp8,1,0.01322666679819425
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,float16,3,0.013989333063364029
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,fp8,3,0.012975999464591345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,float16,7,0.015189333508412043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,fp8,7,0.013647999614477158
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,float16,15,0.014015999933083853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,fp8,15,0.01309866706530253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,float16,31,0.014864000181357065
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,fp8,31,0.01321600005030632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,float16,63,0.013487999637921652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,fp8,63,0.01309866706530253
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,float16,127,0.014954666296641031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,fp8,127,0.013237333546082178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,float16,255,0.013642666240533194
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,fp8,255,0.013370666652917862
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,float16,511,0.03143466760714849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,fp8,511,0.017184000462293625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,float16,1023,0.049584001302719116
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,fp8,1023,0.03216533362865448
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,float16,2047,0.08830400307973225
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,fp8,2047,0.05051200091838837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,float16,4095,0.16166933377583823
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,float16,1,0.01941866676012675
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,1,128,1,float16,fp8,4095,0.08777067065238953
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,fp8,1,0.017162666966517765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,fp8,3,0.01736533393462499
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,float16,3,0.01812800019979477
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,float16,7,0.018944000204404194
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,fp8,7,0.017360000560681026
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,float16,15,0.01916266605257988
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,fp8,15,0.016949333250522614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,float16,31,0.019018666197856266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,float16,63,0.019152000546455383
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,fp8,63,0.01708799973130226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,fp8,31,0.01732800031701724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,float16,127,0.01903466631968816
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,fp8,127,0.01708799973130226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,float16,255,0.029968000948429108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,fp8,255,0.017008000363906223
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,float16,511,0.04901866614818573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,fp8,511,0.03143466760714849
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,float16,1023,0.08558932940165202
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,fp8,1023,0.049786667029062905
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,float16,2047,0.15829867124557495
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,1,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,1,0.009285333255926767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,3,0.00901333304742972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,fp8,2047,0.08617599805196126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,float16,4095,0.3015146652857463
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,3,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,15,0.008912000184257826
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,7,0.008816000074148178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,15,0.008992000172535578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,31,0.008799999952316284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,31,0.009509333098928133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,63,0.009098666409651438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,63,0.009093333035707474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,7,0.009077333534757296
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,127,0.008858666444818178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,127,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,255,0.00927466650803884
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,255,0.009061333412925402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,511,0.010794666906197866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,511,0.010703999549150467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,1023,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,1023,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,2047,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,2047,0.011215999722480774
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,4095,0.013077333569526672
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,2,2,128,1,float16,fp8,4095,0.16065067052841187
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,8191,0.014853333433469137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,16383,0.01695466662446658
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,8191,0.015466666469971338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,16383,0.01524266724785169
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,32767,0.03417599946260452
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,65535,0.05312533179918925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,65535,0.03551999976237615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,float16,131071,0.08930133779843648
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,1,0.009839999799927076
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,131071,0.054469332098960876
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,1,0.01080000028014183
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,4095,0.013173333058754602
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,3,0.009813333551088968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,3,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,7,0.009413333609700203
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,7,0.01003200002014637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,15,0.00979200005531311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,15,0.009349333122372627
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,31,0.009359999870260557
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,31,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,1,128,1,float16,fp8,32767,0.01725333308180173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,63,0.009472000102202097
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,63,0.009530666594703993
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,127,0.010581333190202713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,127,0.008869333192706108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,255,0.009322666873534521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,255,0.009610666582981745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,511,0.011077333241701126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,511,0.01071999967098236
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,1023,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,2047,0.012874666601419449
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,2047,0.012629333883523941
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,4095,0.01301866645614306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,4095,0.014752000570297241
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,8191,0.015541333705186844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,8191,0.014933332800865173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,16383,0.03186133255561193
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,16383,0.01740266631046931
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,32767,0.050250664353370667
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,32767,0.03372266640265783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,65535,0.08726400136947632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,65535,0.05184000233809153
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,131071,0.15972266594568887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,fp8,131071,0.08859733740488689
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,2,2,128,1,float16,float16,1023,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,float16,1,0.018981333822011948
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,fp8,1,0.017050666113694508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,float16,3,0.01903466631968816
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,fp8,3,0.01724799970785777
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,float16,7,0.018735999862353008
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,fp8,7,0.01722666621208191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,float16,15,0.019215999792019527
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,fp8,15,0.01710933322707812
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,float16,31,0.018965333700180054
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,fp8,31,0.017050666113694508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,float16,63,0.019189332922299702
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,fp8,63,0.017231999586025875
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,float16,127,0.018976000448067982
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,fp8,127,0.017024000485738117
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,float16,255,0.030117332935333252
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,fp8,255,0.01754666616519292
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,float16,511,0.04863466819127401
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,fp8,511,0.03173333406448364
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,float16,1023,0.08637866377830505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,fp8,1023,0.05072000126043955
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,float16,2047,0.15873066584269205
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,1,128,1,float16,fp8,2047,0.0867199997107188
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,fp8,1,0.02720000098148982
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,float16,1,0.02918400118748347
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,float16,3,0.02762666592995326
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,fp8,3,0.025519999365011852
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,float16,7,0.02942933390537898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,fp8,7,0.025381334125995636
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,float16,15,0.028255999088287354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,fp8,15,0.02536533276240031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,float16,31,0.029258665939172108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,fp8,31,0.02550400048494339
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,float16,63,0.02756800005833308
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,fp8,63,0.02532800038655599
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,float16,127,0.03547733277082443
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,fp8,127,0.025792000194390614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,float16,255,0.05089066425959269
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,fp8,255,0.035562666753927864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,float16,511,0.08739733695983887
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,fp8,511,0.052789335449536644
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,float16,1023,0.16014933586120605
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,fp8,1023,0.08971200386683147
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,float16,2047,0.3052266637484233
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,2,2,128,1,float16,fp8,2047,0.1630400021870931
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,float16,1,0.027647999425729115
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,fp8,1,0.02537599951028824
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,float16,3,0.02808533360560735
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,float16,7,0.02790933350721995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,fp8,7,0.02531733363866806
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,fp8,3,0.02638400097688039
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,float16,15,0.02788266787926356
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,fp8,15,0.025311999022960663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,float16,31,0.027514666318893433
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,fp8,31,0.02624000112215678
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,float16,63,0.028463999430338543
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,fp8,63,0.025226667523384094
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,float16,127,0.03533866753180822
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,fp8,127,0.026159999271233875
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,float16,255,0.05092266698678335
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,fp8,255,0.035461333890755974
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,float16,511,0.08807999889055888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,float16,1023,0.1609546641508738
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,fp8,511,0.05310399830341339
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,1,128,1,float16,fp8,1023,0.08948266506195068
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,float16,1,0.049957334995269775
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,fp8,1,0.04215466479460398
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,float16,7,0.04995200037956238
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,fp8,3,0.04205333193143209
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,float16,3,0.05091733237107595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,float16,15,0.049914668003718056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,fp8,7,0.04200533529122671
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,fp8,15,0.041637333730856575
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,fp8,31,0.041493333876132965
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,float16,63,0.050288001696268715
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,fp8,63,0.041840001940727234
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,float16,127,0.057850668827692665
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,fp8,127,0.047728002071380615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,float16,255,0.08950933814048767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,float16,31,0.05110399921735128
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,fp8,255,0.057573333382606506
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,fp8,511,0.09179733196894328
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,float16,511,0.16209066907564798
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,1,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,1,0.009765333185593287
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,3,0.008821333448092142
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,3,0.010709332923094431
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,7,0.008832000195980072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,7,0.010575999816258749
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,15,0.009194666519761086
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,15,0.010005333150426546
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,31,0.009130666653315226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,31,0.009242666885256767
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,63,0.00898133342464765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,63,0.009232000137368837
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,127,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,127,0.009402666861812273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,255,0.008976000050703684
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,255,0.009162666896979014
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,511,0.010879999647537867
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,511,0.010826667149861654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,1023,0.01081066702802976
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,1023,0.01055466632048289
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,2047,0.012975999464591345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,2047,0.012853333105643591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,4095,0.014325333138306936
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,4095,0.013280000537633896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,8191,0.015498666713635126
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,8191,0.015178666760524115
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,16383,0.03204799940188726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,16383,0.017525333911180496
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,32767,0.050714666644732155
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,32767,0.03384000062942505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,65535,0.08756267031033833
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,65535,0.052229334910710655
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,float16,131071,0.15996266404787698
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,1,128,1,float16,fp8,131071,0.09102400143941243
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,1,0.008810666700204214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,1,0.009061333412925402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,3,0.008874666566650072
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,3,0.009392000113924345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,7,0.008778666456540426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,7,0.009248000259200731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,15,0.009088000282645226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,15,0.010618666807810465
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,31,0.009136000027259191
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,63,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,63,0.009935999910036722
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,127,0.008826666822036108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,127,0.00983466642598311
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,255,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,255,0.009706666693091393
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,511,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,fp8,1023,0.1644213298956553
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,511,0.010863999525705973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,31,0.010687999427318573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,1023,0.010751999914646149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,2047,0.010853332777818045
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,2047,0.013151999562978745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,4095,0.013183999806642532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,8191,0.030623999734719593
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,8191,0.01653333380818367
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,16383,0.04905066887537638
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,1023,0.011125333607196808
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,16383,0.031925333042939506
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,32767,0.08588266372680664
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,32767,0.049584001302719116
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,65535,0.15959999958674112
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,2,2,128,1,float16,float16,1023,0.3060693343480428
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,65535,0.0865280032157898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,4095,0.01340266689658165
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,float16,131071,0.30457067489624023
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,1,0.00892800030608972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,1,0.009183999771873156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,3,0.00901333304742972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,3,0.009018666421373686
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,7,0.008879999940594038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,7,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,15,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,15,0.00898133342464765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,31,0.009045333291093508
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,31,0.00897066667675972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,63,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,63,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,127,0.01002133327225844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,127,0.01028266673286756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,2,2,128,1,float16,fp8,131071,0.160480002562205
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,255,0.009082666908701261
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,255,0.009872000043590864
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,1023,0.011253333340088526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,511,0.011285333583752314
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,511,0.01098666712641716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,1023,0.010874666273593903
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,2047,0.012847999731699625
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,2047,0.013349333157142004
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,4095,0.01303999995191892
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,4095,0.015450666348139444
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,8191,0.0296426663796107
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,16383,0.04933333396911621
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,32767,0.08674666285514832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,16383,0.03183999905983607
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,32767,0.04987200101216634
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,65535,0.1609173317750295
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,65535,0.08847467104593913
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,1,0.009178666397929192
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,1,0.009690666571259499
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,float16,131071,0.30793599287668866
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,3,0.009434666484594345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,131071,0.1609493295351664
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,7,0.010565333068370819
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,3,0.008943999807039896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,7,0.008826666822036108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,15,0.010464000205198923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,15,0.00916800027092298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,31,0.008879999940594038
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,31,0.010992000500361124
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,63,0.008997333546479544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,63,0.00879466657837232
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,127,0.010048000141978264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,127,0.009365333244204521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,255,0.008885333314538002
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,511,0.011087999989589056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,32,1,1,1,128,1,float16,fp8,8191,0.017157333592573803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,1023,0.010640000303586325
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,1023,0.010570666442314783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,2047,0.010911999891201654
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,2047,0.010682666053374609
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,4095,0.010965333630641302
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,4095,0.01071999967098236
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,8191,0.015119999647140503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,8191,0.014869333555301031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,16383,0.01905599981546402
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,32767,0.025221332907676697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,32767,0.025546667476495106
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,65535,0.029525332152843475
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,65535,0.02977599948644638
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,float16,131071,0.03141333411137263
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,131071,0.031583999594052635
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,1,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,1,0.009119999905427298
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,255,0.010527999450763067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,3,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,3,0.010496000448862711
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,7,0.009093333035707474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,7,0.010608000059922537
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,15,0.008901333436369896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,15,0.01071999967098236
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,31,0.00892800030608972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,31,0.009343999748428663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,63,0.009109333157539368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,16383,0.01911466692884763
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,63,0.009248000259200731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,127,0.008837333569924036
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,127,0.00919999989370505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,255,0.009066666786869368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,511,0.010298666854699453
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,1023,0.009296000003814697
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,511,0.011007999380429586
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,1023,0.010298666854699453
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,2047,0.010751999914646149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,2047,0.010703999549150467
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,4095,0.01118933285276095
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,4095,0.011226666470368704
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,8191,0.015200000256299973
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,8191,0.015253332753976187
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,16383,0.01937599976857503
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,16383,0.0194560003777345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1,1,1,1,128,1,float16,fp8,511,0.010773333410422007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,32767,0.02149333308140437
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,32767,0.021402666966120403
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,65535,0.02310933421055476
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,65535,0.023120000958442688
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,float16,131071,0.04035199930270513
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,131071,0.02533866713444392
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,1,0.009328000247478485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,1,0.009919999788204828
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2,1,1,1,128,1,float16,fp8,255,0.009072000160813332
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,3,0.008869333192706108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,3,0.009402666861812273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,7,0.009279999881982803
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,7,0.01007466639081637
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,15,0.009957333405812582
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,15,0.00890666681031386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,31,0.009093333035707474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,31,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,63,0.00960533320903778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,63,0.009653333574533463
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,127,0.010805333654085795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,127,0.010944000134865442
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,255,0.009445333232482275
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,255,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,511,0.011231999844312668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,511,0.011045332998037338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,1023,0.01321600005030632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,1023,0.012106666962305704
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,2047,0.013007999708255133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,2047,0.013631999492645264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,4095,0.01534933348496755
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,4095,0.02993600070476532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,8191,0.04980266590913137
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,16383,0.086709330479304
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,16383,0.04957866668701172
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,8191,0.032586666444937386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,32767,0.16076800227165222
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,32767,0.08681066830952962
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,65535,0.30770667394002277
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,65535,0.16321067015329996
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,1,0.008789333204428354
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,3,0.008757333581646284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,3,0.008799999952316284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,1,0.00980266680320104
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,7,0.008778666456540426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,7,0.009573333586255709
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,15,0.00892800030608972
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,15,0.009205333267649015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,31,0.00891733355820179
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,31,0.009610666582981745
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,63,0.009824000298976898
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,127,0.010010666524370512
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,127,0.010960000256697336
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,255,0.010357333347201347
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,255,0.010474666953086853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,float16,131071,0.6039199829101562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,64,1,1,1,128,1,float16,fp8,131071,0.3083253304163615
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,511,0.010757333288590113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,1023,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,1023,0.009525333220760027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,511,0.010869332899649939
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,2047,0.010687999427318573
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,2047,0.0107893335322539
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,4095,0.010922666639089584
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,4095,0.011173332730929056
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,8191,0.014896000425020853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,8191,0.014991999914248785
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,16383,0.01736533393462499
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,16383,0.017018667111794155
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,32767,0.019306667149066925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,63,0.008778666456540426
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,32767,0.019194666296243668
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,65535,0.03705599904060364
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,65535,0.019802667200565338
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,float16,131071,0.05605866511662801
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,1,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,1,0.010549332946538925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,3,0.010709332923094431
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,3,0.010746666540702185
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,7,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,7,0.010629333555698395
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,15,0.010549332946538925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,15,0.010581333190202713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,4,1,1,1,128,1,float16,fp8,131071,0.0383146678407987
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,31,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,31,0.010816000401973724
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,63,0.01091733326514562
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,63,0.010570666442314783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,127,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,127,0.010768000036478043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,255,0.010527999450763067
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,511,0.01090666651725769
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,511,0.01097600037852923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,1023,0.013061333447694778
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,1023,0.012485332787036896
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,2047,0.029301332930723827
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,2047,0.015184000134468079
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,4095,0.04833066463470459
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,4095,0.030906667311986286
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,255,0.010751999914646149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,8191,0.0863200028737386
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,8191,0.04952000081539154
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,16383,0.0867733359336853
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,16383,0.16056000192960104
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,32767,0.3078666726748149
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,32767,0.16134933630625406
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,float16,65535,0.6024479866027832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,128,1,1,1,128,1,float16,fp8,65535,0.30822400252024335
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,1,0.012533333152532578
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,3,0.011120000233252844
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,3,0.01119999960064888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,1,0.011594666788975397
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,7,0.01139733319481214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,7,0.011349332829316458
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,15,0.011333333949247995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,15,0.011605333536863327
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,31,0.011157333850860596
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,31,0.010933333386977514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,63,0.011264000087976456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,63,0.011264000087976456
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,127,0.01163200040658315
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,127,0.011306667079528173
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,255,0.011253333340088526
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,255,0.011194666226704916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,511,0.013274667163689932
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,1023,0.03102933367093404
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,1023,0.015189333508412043
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,2047,0.04940799872080485
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,511,0.01301866645614306
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,2047,0.031317333380381264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,4095,0.08602666854858398
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,8191,0.16076266765594482
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,8191,0.0883840024471283
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,4095,0.050144001841545105
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,fp8,16383,0.16293332974116007
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,256,1,1,1,128,1,float16,float16,16383,0.3088746666908264
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,float16,1,0.014906667172908783
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,fp8,1,0.013386666774749756
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,float16,3,0.012901333471139273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,float16,7,0.013957332819700241
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,fp8,3,0.013066666821638743
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,fp8,7,0.013381333400805792
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,float16,15,0.013487999637921652
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,fp8,15,0.013317332913478216
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,float16,31,0.014874666929244995
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,fp8,31,0.013301332791646322
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,float16,63,0.01310933381319046
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,fp8,63,0.013237333546082178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,float16,127,0.01314666618903478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,fp8,127,0.013237333546082178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,float16,255,0.013466666142145792
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,fp8,255,0.013183999806642532
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,float16,511,0.029717333614826202
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,fp8,511,0.016938666502634685
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,float16,1023,0.048800001541773476
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,fp8,1023,0.032042667269706726
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,float16,2047,0.08680533369382222
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,fp8,2047,0.05136000116666158
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,float16,4095,0.1612160007158915
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,1,0.00919999989370505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,1,0.009248000259200731
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,fp8,4095,0.08761599659919739
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,float16,8191,0.31090666850407916
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,3,0.008869333192706108
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,7,0.008863999818762144
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,15,0.008922666932145754
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,7,0.009205333267649015
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,15,0.00902399979531765
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,31,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,31,0.008810666700204214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,63,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,3,0.009194666519761086
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,63,0.009029333169261614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,127,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,127,0.009354666496316591
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,255,0.008853333070874214
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,255,0.00915733352303505
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,511,0.011114666859308878
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,511,0.01101333275437355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,1023,0.010970667004585266
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,1023,0.01119999960064888
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,2047,0.010805333654085795
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,4095,0.01321600005030632
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,4095,0.012906666845083237
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,512,1,1,1,128,1,float16,fp8,8191,0.16192000110944113
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,8191,0.015002666662136713
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,8191,0.014869333555301031
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,16383,0.01773333301146825
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,32767,0.03339733431736628
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,16383,0.01706133286158244
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,65535,0.052629331747690834
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,65535,0.035018667578697205
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,float16,131071,0.08919466535250346
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,131071,0.053861334919929504
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,2047,0.011034666250149408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,8,1,1,1,128,1,float16,fp8,32767,0.019109333554903667
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,float16,1,0.018986667195955913
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,fp8,1,0.017322666943073273
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,float16,3,0.018992000569899876
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,fp8,3,0.017386666188637417
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,float16,15,0.019061333189407986
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,fp8,7,0.017429333180189133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,fp8,15,0.017429333180189133
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,float16,7,0.019199999670187633
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,float16,31,0.01899733394384384
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,fp8,31,0.017488000293572743
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,float16,63,0.019152000546455383
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,fp8,63,0.017055999487638474
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,fp8,127,0.01738133281469345
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,float16,127,0.019306667149066925
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,float16,255,0.030159999926884968
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,fp8,255,0.01695466662446658
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,float16,511,0.04851733148097992
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,fp8,511,0.0315786674618721
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,float16,1023,0.0846560001373291
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,fp8,1023,0.04956800242265066
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,float16,2047,0.15798933307329813
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,fp8,2047,0.08608532945315044
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,float16,4095,0.3038133382797241
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,1024,1,1,1,128,1,float16,fp8,4095,0.15920533736546835
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,float16,1,0.028437333802382152
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,fp8,1,0.026767998933792114
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,float16,3,0.029690665503342945
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,fp8,3,0.02548266698916753
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,float16,7,0.028223998844623566
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,float16,15,0.02734400083621343
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,fp8,15,0.025498665869235992
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,fp8,7,0.026261332134405773
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,float16,31,0.027855999767780304
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,fp8,31,0.025263999899228413
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,float16,63,0.0276853342851003
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,fp8,63,0.02651199946800868
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,float16,127,0.03493333359559377
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,fp8,127,0.025536000728607178
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,float16,255,0.05022400120894114
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,fp8,255,0.03566933423280716
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,float16,511,0.08819733063379924
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,fp8,511,0.052255998055140175
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,float16,1023,0.15967999895413718
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,1,0.009130666653315226
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,1,0.009343999748428663
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,3,0.009152000149091085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,3,0.009695999945203463
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,7,0.009008000294367472
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,7,0.009365333244204521
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,15,0.009141333401203156
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,15,0.009039999917149544
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,fp8,1023,0.08877333005269368
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,31,0.009216000015536943
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,31,0.009525333220760027
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,63,0.009599999835093817
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,127,0.008799999952316284
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,127,0.009583999713261923
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,255,0.00914666677514712
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,63,0.009519999846816063
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,255,0.009056000038981438
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,511,0.010885333021481832
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,511,0.010832000523805618
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,1023,0.010938666760921478
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,1023,0.010949333508809408
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,2047,0.01302933320403099
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,2047,0.0129120002190272
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,4095,0.014965333044528961
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,4095,0.012944000462690989
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,float16,2047,0.3062933286031087
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,8191,0.01590399940808614
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,8191,0.01498666654030482
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,32767,0.051072001457214355
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,16383,0.03270933280388514
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,32767,0.03370666752258936
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,65535,0.0883893370628357
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,65535,0.05195199946562449
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,131071,0.08868799606959026
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,float16,131071,0.16110933820406595
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,16,1,1,1,128,1,float16,fp8,16383,0.01747200017174085
VLLM,0.14.1.dev1+gd68209402,NVIDIA GB300,generation_attention,vllm_flashinfer,2048,1,1,1,128,1,float16,fp8,2047,0.16168000300725302
