framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,1,1,0,0.14612799882888794
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,0,0.10179733236630757
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,1,16,0,0.20114666223526
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,0,0.11974400281906128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,1,32,0,0.20359466473261514
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,0,0.22272533178329468
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,1,64,0,0.20564266045888266
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,0,0.1578986644744873
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,1,128,0,0.1985973318417867
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,0,0.10011733571688335
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,1,1,0,0.12371200323104858
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,0,0.10797866185506184
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,1,2,0,0.20645866791407266
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,0,0.11532266934712727
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,1,4,0,0.19318399826685587
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,0,0.14659200112024942
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,1,8,0,0.20794665813446045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,0,0.1504693329334259
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,0,0.12658666570981345
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,1,16,0,0.1600320041179657
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,0,0.12289067109425862
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,1,32,0,0.16757333278656006
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,0,0.13058666388193765
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,1,64,0,0.20543466011683145
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,0,0.12384000420570374
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,1,128,0,0.18872000773747763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,0,0.09690133730570476
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,1,1,0,0.16790932416915894
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,0,0.11953066786130269
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,0,0.1606773336728414
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,1,2,0,0.2139306664466858
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,0,0.09762133161226909
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,1,2,0,0.27185599009195965
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,0,0.1567626694838206
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,1,4,0,0.21372799078623453
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,1,8,0,0.20082666476567587
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,0,0.1553759972254435
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,0,0.1601706643899282
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,1,16,0,0.201690673828125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,1,32,0,0.209114670753479
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,0,0.15610667069753012
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,0,0.15361600120862326
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,0,0.12328533331553142
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,1,64,0,0.20940266052881876
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,1,128,0,0.14450132846832275
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,0,0.16827199856440225
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,0,0.1583626667658488
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,1,1,0,0.19762667020161948
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,1,2,0,0.2097973426183065
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,0,0.16040533781051636
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,1,4,0,0.16857065757115683
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,0,0.12367467085520427
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,1,8,0,0.16185067097345987
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,0,0.1585546632607778
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,1,8,0,0.15377599994341531
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,1,16,0,0.2009920080502828
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,0,0.37060801188151044
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,1,32,0,0.15275200208028158
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,0,0.15591466426849365
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,0,0.1274133324623108
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,1,64,0,0.20147732893625894
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,1,128,0,0.20258132616678873
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,0,0.1186346709728241
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,0,0.15768532951672873
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,1,1,0,0.1599146624406179
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,0,0.1570026675860087
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,1,2,0,0.19435733556747437
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,0,0.12297067046165466
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,1,4,0,0.13041067123413086
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,0,0.16039466857910156
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,0,0.15119466185569763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,1,8,0,0.20399999618530273
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,1,16,0,0.16637333234151205
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,0,0.12849066654841104
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,1,32,0,0.14525866508483887
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,1,64,0,0.20453333854675293
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,0,0.10776533683141072
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,0,0.15445866187413534
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,1,128,0,0.1913386583328247
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,0,0.14940800269444784
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,0,0.14235732952753702
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,1,1,0,0.20959466695785522
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,0,0.16371200482050577
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,1,2,0,0.20737600326538086
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,1,4,0,0.1927573283513387
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,0,0.10060800115267436
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,1,8,0,0.20055466890335083
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,0,0.10270399848620097
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,0,0.1030613382657369
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,1,16,0,0.1304159959157308
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,1,64,0,0.1292800009250641
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,1,32,0,0.20186134179433188
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,1,128,0,0.19422932465871176
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,0,0.11156266927719116
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,0,0.11199999849001567
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,0,0.1575146714846293
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,1,1,0,0.16596800088882446
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,0,0.25017066796620685
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,0,0.10013866424560547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,1,4,0,0.20009599129358926
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,1,2,0,0.20265599091847739
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,1,4,0,0.20840533574422201
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,0,0.1548960010210673
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,1,8,0,0.12658133109410605
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,0,0.09702933828035991
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,0,0.1600266695022583
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,1,16,0,0.13621333241462708
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,1,32,0,0.18257067600886026
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,0,0.15237333377202353
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,1,64,0,0.1859253247578939
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,0,0.12371200323104858
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,1,128,0,0.199018657207489
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,0,0.1256053348382314
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,1,1,0,0.20493332544962564
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,0,0.10161599516868591
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,1,2,0,0.12180800239245097
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,0,0.1051626702149709
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,0,0.09707199533780415
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,1,4,0,0.15763200322786966
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,1,8,0,0.1760853330294291
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,0,0.11230400204658508
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,1,16,0,0.20440000295639038
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,0,0.09992532928784688
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,1,32,0,0.1565013329188029
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,0,0.16433067123095194
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,1,64,0,0.1921173334121704
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,0,0.12044266859690349
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,1,128,0,0.2020639975865682
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,0,0.16103999813397726
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,256,1,2,0,0.21545066436131796
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,256,1,4,0,0.20706133047739664
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,0,0.15904000401496887
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,0,0.15610133608182272
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,256,1,1,0,0.19957866271336874
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,0,0.12416000167528789
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,256,1,8,0,0.1253706713517507
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,0,0.12442133824030559
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,0,0.09905067086219788
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,256,1,16,0,0.15643200278282166
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,256,1,32,0,0.15520532925923666
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,0,0.15662399927775064
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,16,1,0,0.165994664033254
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,256,1,64,0,0.20273600021998087
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,0,0.16025599837303162
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,16,2,0,0.156442662080129
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,16,4,0,0.16172800461451212
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,16,1,0,0.20408000548680624
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,256,1,128,0,0.2020960052808126
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,16,8,0,0.19883199532826742
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,16,2,0,0.2041706641515096
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,16,32,0,0.15561599532763162
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,16,4,0,0.12812800208727518
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,16,8,0,0.15774400035540262
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,16,16,0,0.14138666788736978
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,16,16,0,0.2018453280131022
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,16,64,0,0.14615466197331747
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,16,32,0,0.155541330575943
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,16,64,0,0.18937599658966064
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,16,128,0,0.15639999508857727
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,16,128,0,0.18981333573659262
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,16,1,0,0.14590932925542197
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,16,1,0,0.1977120041847229
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,16,2,0,0.13341866930325827
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,16,2,0,0.13783466815948486
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,16,4,0,0.1556000014146169
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,16,4,0,0.1525866687297821
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,16,8,0,0.13593600193659464
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,16,8,0,0.19022399187088013
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,16,16,0,0.15246933698654175
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,16,16,0,0.15597333510716757
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,16,32,0,0.13897599776585898
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,16,32,0,0.20897066593170166
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,16,64,0,0.10052266716957092
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,16,64,0,0.15676800409952799
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,16,128,0,0.16160000363985697
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,16,128,0,0.2914186716079712
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,16,1,0,0.34920533498128253
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,16,1,0,0.19469332695007324
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,16,4,0,0.20124266544977823
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,16,4,0,0.1588479975859324
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,16,2,0,0.123690664768219
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,16,2,0,0.14847466349601746
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,16,8,0,0.12847466270128885
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,16,8,0,0.2058133284250895
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,16,16,0,0.14946132898330688
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,16,16,0,0.16422933340072632
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,16,32,0,0.1606986622015635
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,16,128,0,0.1570026675860087
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,16,32,0,0.19109867016474405
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,16,64,0,0.15099733074506125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,16,64,0,0.18129066626230875
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,16,128,0,0.12709333499272665
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,16,1,0,0.14970133701960245
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,16,1,0,0.10925333698590596
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,16,4,0,0.2047733267148336
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,16,2,0,0.10470933715502422
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,16,2,0,0.20648000637690225
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,16,4,0,0.1583466629187266
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,16,16,0,0.16249066591262817
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,16,8,0,0.1240053375562032
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,16,8,0,0.1740586757659912
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,16,16,0,0.12982933719952902
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,16,32,0,0.1609333356221517
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,16,32,0,0.19617599248886108
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,16,64,0,0.12679466605186462
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,16,64,0,0.19272534052530924
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,16,128,0,0.14085333546002707
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,16,128,0,0.16157333056131998
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,16,1,0,0.26764800151189166
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,16,2,0,0.11424000064531963
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,16,2,0,0.18331199884414673
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,16,1,0,0.20152533054351807
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,16,4,0,0.09724266330401103
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,16,8,0,0.16331733266512552
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,16,4,0,0.20325867335001627
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,16,8,0,0.14230400323867798
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,16,16,0,0.1509866714477539
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,16,16,0,0.14636266231536865
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,16,32,0,0.10841066638628642
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,16,32,0,0.12658666570981345
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,16,64,0,0.1634666621685028
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,16,64,0,0.18996800978978476
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,16,128,0,0.14833066860834757
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,16,128,0,0.19702933231989542
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,16,1,0,0.18711467583974203
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,16,1,0,0.3482666810353597
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,16,2,0,0.14918933312098184
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,16,4,0,0.16292267044385275
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,16,2,0,0.21681066354115805
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,16,4,0,0.18057066202163696
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,16,8,0,0.16040000319480896
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,16,8,0,0.20403732856114706
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,16,16,0,0.1590986649195353
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,16,16,0,0.2041226625442505
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,16,64,0,0.21042132377624512
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,16,32,0,0.16156799594561258
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,16,64,0,0.12677866220474243
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,16,32,0,0.19539733727773032
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,16,128,0,0.1516853372255961
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,16,1,0,0.3262453277905782
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,16,128,0,0.1316373348236084
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,16,1,0,0.6446719964345297
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,16,2,0,0.18329066038131714
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,16,2,0,0.394378662109375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,16,4,0,0.1609119971593221
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,16,4,0,0.26101332902908325
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,16,8,0,0.21156799793243408
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,16,8,0,0.16050133109092712
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,16,16,0,0.15967466433842978
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,16,16,0,0.20855466524759927
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,16,64,0,0.16107733050982156
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,16,32,0,0.13899733622868857
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,16,32,0,0.15689599514007568
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,16,64,0,0.12707199652989706
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,16,128,0,0.12807466586430868
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,16,128,0,0.15731733043988547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,16,1,0,0.5992799997329712
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,16,2,0,0.32393066088358563
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,16,1,0,1.218079964319865
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,16,2,0,0.7429172992706299
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,16,4,0,0.1872160037358602
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,16,8,0,0.158842662970225
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,16,4,0,0.5041546821594238
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,16,32,0,0.11998400092124939
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,16,16,0,0.31413867076237995
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,16,32,0,0.2937493324279785
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,16,8,0,0.3646506468454997
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,16,16,0,0.16361600160598755
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,16,64,0,0.15028799573580423
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,16,64,0,0.2620159983634949
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,16,128,0,0.13134400049845377
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,16,128,0,0.2323466738065084
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,16,1,0,1.1536320050557454
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,16,4,0,0.34597333272298175
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,256,16,2,0,1.439791997273763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,16,2,0,0.6082239945729574
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,256,16,4,0,0.9669493039449056
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,256,16,8,0,0.7118240197499593
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,16,8,0,0.1925119956334432
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,256,16,1,0,2.3910293579101562
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,16,16,0,0.1307146648565928
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,256,16,16,0,0.5828160047531128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,16,32,0,0.12476266423861186
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,256,16,32,0,0.5431840022404989
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,32,1,0,0.16242667039235434
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,256,16,64,0,0.47086934248606366
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,256,16,128,0,0.4419519901275635
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,32,1,0,0.2059733271598816
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,32,2,0,0.15949333707491556
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,16,128,0,0.13064533472061157
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,32,4,0,0.1558080017566681
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,16,64,0,0.16124799847602844
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,32,4,0,0.15732799967130026
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,32,2,0,0.16341867049535116
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,32,16,0,0.1583199997742971
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,32,8,0,0.16221867005030313
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,32,8,0,0.19166932503382364
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,32,16,0,0.2158880035082499
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,32,128,0,0.13318399588267008
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,32,32,0,0.15658133228619894
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,32,32,0,0.20114133755366007
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,32,64,0,0.15689067045847574
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,32,64,0,0.18650132417678833
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,32,2,0,0.13063466548919678
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,32,128,0,0.17171200116475424
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,32,2,0,0.19619200627009073
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,32,1,0,0.11195733149846394
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,32,1,0,0.1446293294429779
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,32,4,0,0.1427786648273468
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,32,4,0,0.17015999555587769
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,32,8,0,0.14622400204340616
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,32,8,0,0.19738133748372397
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,32,16,0,0.1267253359158834
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,32,32,0,0.20216000080108643
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,32,64,0,0.1455519994099935
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,32,16,0,0.16289066274960837
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,32,32,0,0.13958932956059775
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,32,64,0,0.16010666886965433
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,32,128,0,0.12710932890574136
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,32,128,0,0.18931732575098673
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,32,1,0,0.20357332626978555
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,32,1,0,0.16711467504501343
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,32,4,0,0.1628266672293345
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,32,2,0,0.2132533391316732
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,32,8,0,0.17356266578038534
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,32,8,0,0.11329600214958191
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,32,2,0,0.16286933422088623
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,32,4,0,0.20977065960566202
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,32,16,0,0.14682666460673013
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,32,16,0,0.1600160002708435
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,32,32,0,0.1258080005645752
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,32,32,0,0.14723199605941772
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,32,64,0,0.14882666865984598
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,32,64,0,0.15401599804560342
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,32,128,0,0.14728533228238425
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,32,128,0,0.1905919909477234
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,32,1,0,0.13506133357683817
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,32,2,0,0.31304534276326496
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,32,1,0,0.22267733017603555
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,32,2,0,0.1476533313592275
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,32,4,0,0.20064000288645426
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,32,4,0,0.15602133671442667
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,32,8,0,0.14762666821479797
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,32,32,0,0.18452799320220947
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,32,8,0,0.1642453372478485
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,32,16,0,0.10479467113812764
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,32,16,0,0.14787200093269348
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,32,32,0,0.12588266531626383
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,32,64,0,0.15313067038853964
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,32,64,0,0.19734932978947958
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,32,128,0,0.13766933480898538
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,32,128,0,0.17232000827789307
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,32,1,0,0.1881600022315979
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,32,2,0,0.15226133664449057
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,32,8,0,0.11901866396268208
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,32,1,0,0.34090665976206463
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,32,2,0,0.21239999930063883
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,32,4,0,0.12834133704503378
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,32,32,0,0.1336373289426168
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,32,4,0,0.19790399074554443
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,32,8,0,0.20281600952148438
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,32,16,0,0.12461866935094197
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,32,128,0,0.12084266543388367
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,32,128,0,0.19136534134546915
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,32,16,0,0.1824959913889567
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,32,32,0,0.1649440030256907
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,32,64,0,0.1302293340365092
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,32,1,0,0.6314826806386312
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,32,64,0,0.19524800777435303
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,32,1,0,0.32421332597732544
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,32,2,0,0.18799465894699097
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,32,4,0,0.17683200041453043
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,32,16,0,0.16114667057991028
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,32,2,0,0.37881068388621014
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,32,4,0,0.2506986657778422
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,32,32,0,0.15461867054303488
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,32,8,0,0.16099199652671814
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,32,8,0,0.21734400590260824
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,32,16,0,0.199455996354421
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,32,32,0,0.11859200398127238
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,32,64,0,0.15358400344848633
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,32,64,0,0.19829867283503214
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,32,128,0,0.12106666962305705
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,32,128,0,0.19861332575480142
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,32,1,0,0.6044533252716064
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,32,2,0,0.3258240024248759
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,32,1,0,1.2008000214894612
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,32,2,0,0.719749371210734
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,32,4,0,0.18445332845052084
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,32,4,0,0.4756106535593669
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,32,8,0,0.1368000010649363
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,32,8,0,0.3386293252309163
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,32,16,0,0.14826132853825888
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,32,16,0,0.2889973322550456
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,32,32,0,0.15282133221626282
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,32,32,0,0.2648693323135376
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,32,64,0,0.12484266360600789
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,32,64,0,0.23838400840759277
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,32,128,0,0.12499733765920003
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,32,128,0,0.21556800603866577
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,32,1,0,1.1529066562652588
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,32,2,0,0.6020906766255697
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,32,1,0,2.329871972401937
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,32,2,0,1.3915146191914876
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,32,4,0,0.32624000310897827
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,32,8,0,0.19165333112080893
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,32,4,0,0.9111253420511881
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,32,8,0,0.6631360054016113
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,32,16,0,0.1514026621977488
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,32,16,0,0.5510453383127848
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,32,32,0,0.14292266964912415
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,32,32,0,0.47489599386850995
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,32,128,0,0.15964266657829285
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,32,64,0,0.15614400307337442
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,32,64,0,0.4549119869867961
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,32,128,0,0.4078559875488281
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,32,1,0,2.2317760785420737
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,32,2,0,1.1610506375630696
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,32,4,0,0.626911997795105
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,32,8,0,0.3398933410644531
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,256,32,1,0,4.610410690307617
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,256,32,4,0,1.7883520126342773
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,256,32,2,0,2.721909205118815
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,32,16,0,0.20099733273188272
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,256,32,8,0,1.2911946773529053
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,256,32,16,0,1.0662079652150471
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,32,32,0,0.13217600186665854
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,64,1,0,0.12686933080355325
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,256,32,32,0,0.9447626272837321
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,64,2,0,0.1511786679426829
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,64,1,0,0.1955733299255371
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,32,64,0,0.16197333733240762
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,256,32,128,0,0.7623519897460938
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,256,32,64,0,0.8535359700520834
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,32,128,0,0.16293332974116007
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,64,2,0,0.20840533574422201
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,64,4,0,0.15930133064587912
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,64,8,0,0.1579093337059021
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,64,16,0,0.14846932888031006
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,64,8,0,0.21034133434295654
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,64,4,0,0.1864746610323588
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,64,16,0,0.20635199546813965
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,64,128,0,0.14895466963450113
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,64,32,0,0.12299199899037679
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,64,32,0,0.20694400866826376
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,64,64,0,0.149807999531428
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,64,64,0,0.18197333812713623
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,64,128,0,0.20013866821924844
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,64,1,0,0.1444533367951711
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,64,2,0,0.16781866550445557
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,64,1,0,0.15710399548212686
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,64,2,0,0.15542399883270264
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,64,4,0,0.14312533537546793
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,64,4,0,0.1950613260269165
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,64,8,0,0.13663466771443686
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,64,8,0,0.16104533274968466
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,64,16,0,0.1550826629002889
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,64,16,0,0.19180800517400107
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,64,128,0,0.14491732915242514
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,64,32,0,0.12083199620246887
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,64,32,0,0.15190933148066202
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,64,64,0,0.13477333386739096
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,64,1,0,0.19951999187469482
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,64,64,0,0.149509330590566
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,64,128,0,0.15635200341542563
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,64,1,0,0.12796266873677573
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,64,2,0,0.154831995566686
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,64,2,0,0.2023786703745524
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,64,4,0,0.12325333555539449
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,64,16,0,0.1439893345038096
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,64,4,0,0.1914773384730021
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,64,8,0,0.1513813336690267
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,64,8,0,0.1532426675160726
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,64,16,0,0.15100266536076865
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,64,32,0,0.15025599797566733
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,64,32,0,0.1498240033785502
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,64,64,0,0.14840533336003622
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,64,64,0,0.15876266360282898
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,64,128,0,0.14090133706728616
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,64,128,0,0.2003893256187439
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,64,1,0,0.1867039998372396
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,64,2,0,0.13249066472053528
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,64,1,0,0.3423893451690674
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,64,2,0,0.22187199195226034
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,64,4,0,0.15992533167203268
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,64,8,0,0.15779733657836914
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,64,4,0,0.2081600030263265
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,64,32,0,0.14418133099873862
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,64,8,0,0.20151466131210327
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,64,16,0,0.10991999506950378
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,64,16,0,0.17780800660451254
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,64,128,0,0.14914133151372275
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,64,32,0,0.1393013298511505
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,64,64,0,0.1551146705945333
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,64,64,0,0.16207999984423319
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,64,128,0,0.14499200383822122
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,64,1,0,0.3286079963048299
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,64,1,0,0.6327999830245972
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,64,4,0,0.24046399195988974
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,64,4,0,0.16708266735076904
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,64,2,0,0.18027732769648233
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,64,2,0,0.38239999612172443
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,64,8,0,0.164192001024882
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,64,8,0,0.16942399740219116
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,64,16,0,0.159360001484553
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,64,16,0,0.20588266849517822
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,64,32,0,0.15614400307337442
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,64,32,0,0.1519253353277842
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,64,64,0,0.15848533312479654
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,64,64,0,0.21806933482487997
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,64,128,0,0.15238933761914572
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,64,128,0,0.1684373418490092
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,64,1,0,0.5980480114618937
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,64,2,0,0.3254026571909587
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,64,1,0,1.213098684946696
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,64,2,0,0.7369866371154785
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,64,4,0,0.18500800927480063
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,64,4,0,0.45996801058451336
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,64,16,0,0.16296533743540445
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,64,8,0,0.16292267044385275
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,64,8,0,0.3211839993794759
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,64,16,0,0.2592693368593852
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,64,32,0,0.1585706671079
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,64,32,0,0.2274613380432129
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,64,64,0,0.1479146679242452
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,64,64,0,0.22180799643198648
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,64,128,0,0.12396267056465149
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,64,128,0,0.19856532414754233
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,64,2,0,0.6078986724217733
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,64,1,0,1.1453920205434163
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,64,4,0,0.32734400033950806
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,64,1,0,2.3611146608988443
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,64,2,0,1.3793813387552898
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,64,8,0,0.1886133352915446
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,64,4,0,0.868885358174642
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,64,8,0,0.6134080092112223
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,64,16,0,0.2935360074043274
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,64,32,0,0.16160000363985697
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,64,16,0,0.48047467072804767
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,64,32,0,0.42744000752766925
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,64,64,0,0.39390401045481366
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,64,64,0,0.13109866778055826
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,64,128,0,0.10751466949780782
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,64,128,0,0.3645013173421224
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,64,4,0,0.6136586666107178
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,64,1,0,2.2411626180013022
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,64,2,0,1.1564053694407146
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,64,8,0,0.3347359895706177
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,64,2,0,2.6995038986206055
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,64,4,0,1.6807039578755696
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,64,8,0,1.1813066800435383
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,64,16,0,0.20495466391245523
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,64,1,0,4.6515092849731445
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,64,16,0,0.9363679885864258
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,64,32,0,0.16906134287516275
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,64,32,0,0.8025173346201578
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,64,128,0,0.16291200121243796
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,64,64,0,0.1257866621017456
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,64,64,0,0.7484959761301676
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,64,128,0,0.7283199628194174
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,64,2,0,2.250943978627523
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,64,4,0,1.1847519874572754
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,64,8,0,0.639631986618042
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,64,1,0,4.398928006490071
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,256,64,4,0,3.3233973185221353
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,256,64,8,0,2.319706598917643
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,256,64,2,0,5.342149098714192
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,64,16,0,0.36561067899068195
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,256,64,1,0,9.212890625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,64,32,0,0.22890132665634155
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,256,64,16,0,1.8299306233723958
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,64,64,0,0.15660267074902853
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,256,64,32,0,1.5650293032328289
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,128,1,0,0.0997759997844696
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,256,64,64,0,1.4585013389587402
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,64,128,0,0.1548960010210673
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,128,2,0,0.16180800398190817
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,128,4,0,0.16131732861200967
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,128,8,0,0.15777599811553955
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,256,64,128,0,1.3984373410542805
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,128,16,0,0.1288426617781321
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,128,32,0,0.14916266997655234
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,128,64,0,0.12582932909329733
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,128,128,0,0.12442666292190552
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,128,1,0,0.11847999691963196
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,128,2,0,0.1300266683101654
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,128,4,0,0.10011733571688335
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,128,16,0,0.09636267026265462
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,128,8,0,0.1092800001303355
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,128,64,0,0.09915199875831604
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,128,32,0,0.10825066765149434
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,128,128,0,0.10480533043543498
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,1,1,0,0.1304480036099752
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,0,0.1283626655737559
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,0,0.10587732990582784
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,1,16,0,0.14299199978510538
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,1,8,0,0.12576533357302347
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,0,0.13949867089589438
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,1,4,0,0.13244266311327615
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,0,0.11002666751543681
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,1,32,0,0.13334932923316956
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,0,0.1253439982732137
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,0,0.09835199515024821
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,0,0.10436266660690308
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,1,64,0,0.20529067516326904
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,1,2,0,0.1254026691118876
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,0,0.10002133250236511
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,0,0.10478400190671285
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,1,128,0,0.13565867145856222
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,0,0.09741866588592529
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,1,1,0,0.2148373325665792
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,1,2,0,0.1330826679865519
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,0,0.09760000308354695
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,1,4,0,0.12199466427167256
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,0,0.1164533297220866
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,1,8,0,0.15288533767064413
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,0,0.1632960041364034
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,1,16,0,0.2068106730779012
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,0,0.10441066821416219
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,1,32,0,0.20197333892186484
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,0,0.1232319970925649
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,1,64,0,0.1183733344078064
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,0,0.09486933549245198
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,1,128,0,0.16525866587956747
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,0,0.10299733281135559
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,1,1,0,0.13144532839457193
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,0,0.12821867068608603
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,1,2,0,0.18141865730285645
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,0,0.12311466534932454
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,1,4,0,0.19708800315856934
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,0,0.1153600017229716
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,1,8,0,0.15921066204706827
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,0,0.16054399808247885
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,1,16,0,0.20594666401545206
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,0,0.12737600008646646
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,1,32,0,0.1534346640110016
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,1,1,0,0.13218133648236594
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,0,0.12550399700800577
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,1,64,0,0.19760000705718994
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,0,0.11738133430480957
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,1,128,0,0.15997866789499918
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,0,0.10102933645248413
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,0,0.12870400150616965
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,1,2,0,0.18565332889556885
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,0,0.10300266742706299
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,1,4,0,0.1602186659971873
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,0,0.0958079993724823
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,1,8,0,0.12441600362459819
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,0,0.12688000003496805
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,1,16,0,0.20690667629241943
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,0,0.13063466548919678
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,0,0.10000532865524292
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,1,32,0,0.21314666668574014
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,0,0.13512000441551208
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,1,64,0,0.15917866428693137
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,0,0.15423466761906943
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,1,128,0,0.1560533344745636
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,1,1,0,0.13369066516558328
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,0,0.10090667009353638
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,1,2,0,0.1325493355592092
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,0,0.0974666674931844
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,0,0.10469333330790202
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,1,32,0,0.12571733196576437
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,1,4,0,0.13171733419100443
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,0,0.09430399537086487
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,1,8,0,0.11916800340016682
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,0,0.09556266665458679
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,1,16,0,0.13247999548912048
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,0,0.1025279959042867
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,1,64,0,0.16177067160606384
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,1,2,0,0.126309335231781
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,0,0.09761599699656169
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,0,0.124208003282547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,1,128,0,0.13289067149162292
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,1,8,0,0.18317333857218424
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,0,0.09744000434875488
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,1,1,0,0.12455466389656067
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,0,0.10047466556231181
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,1,4,0,0.12486933668454488
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,0,0.09820266564687093
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,0,0.09939199686050415
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,1,16,0,0.13081600268681845
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,0,0.09929600358009338
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,1,32,0,0.1240000029404958
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,0,0.10240000486373901
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,1,64,0,0.12613333264986673
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,0,0.09556800127029419
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,1,128,0,0.13544533650080362
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,0,0.1002400020758311
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,1,1,0,0.12728533148765564
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,0,0.09796266754468282
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,1,2,0,0.12635200222333273
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,0,0.09271466732025146
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,1,4,0,0.12803199887275696
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,0,0.09481599926948547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,1,8,0,0.12898666659990946
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,0,0.11708266536394756
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,0,0.10598400235176086
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,1,16,0,0.1283253331979116
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,0,0.1511413355668386
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,1,32,0,0.19258666038513184
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,1,64,0,0.160671999057134
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,0,0.12507733702659607
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,1,128,0,0.20640534162521362
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,0,0.10199466347694397
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,1,1,0,0.13026133179664612
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,1,8,0,0.13020267089207968
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,0,0.10022399822870891
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,0,0.09915733337402344
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,1,2,0,0.12431466579437256
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,0,0.09478400150934856
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,1,4,0,0.13333866993586221
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,0,0.0950933297475179
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,1,16,0,0.12387200196584065
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,0,0.10655466715494792
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,1,32,0,0.19497066736221313
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,0,0.11876799662907918
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,1,64,0,0.18184000253677368
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,0,0.1534986694653829
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,1,128,0,0.15476800004641214
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,0,0.1596160034338633
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,256,1,1,0,0.19699199994405112
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,0,0.10371200243631999
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,256,1,2,0,0.25358400742212933
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,0,0.09662399689356486
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,256,1,4,0,0.12949867049853006
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,0,0.15244266390800476
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,256,1,8,0,0.16106133659680685
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,0,0.13153066237767538
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,256,1,16,0,0.1269973317782084
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,0,0.15692266821861267
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,256,1,32,0,0.20086399714152017
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,0,0.10114666819572449
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,256,1,64,0,0.133925328652064
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,16,2,0,0.1476533313592275
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,0,0.15180800358454385
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,16,1,0,0.09613866607348125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,256,1,128,0,0.1581706702709198
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,16,1,0,0.12358400225639343
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,16,2,0,0.09664533535639445
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,16,4,0,0.09936533371607463
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,16,4,0,0.19144533077875772
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,16,8,0,0.13793067137400308
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,16,8,0,0.19426133235295615
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,16,16,0,0.12959999839464822
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,16,128,0,0.12339199582735698
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,16,16,0,0.18550399939219156
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,16,32,0,0.1506613294283549
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,16,32,0,0.1986400087674459
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,16,64,0,0.14897599816322327
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,16,64,0,0.19615999857584634
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,16,128,0,0.1937546730041504
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,16,1,0,0.09431466460227966
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,16,1,0,0.1283253331979116
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,16,2,0,0.09853866696357727
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,16,2,0,0.12813867131868997
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,16,4,0,0.10015466809272766
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,16,4,0,0.1308693289756775
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,16,8,0,0.1514346698919932
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,16,8,0,0.1232266624768575
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,16,16,0,0.12301333745320638
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,16,16,0,0.1623306671778361
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,16,128,0,0.19378666083017984
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,16,32,0,0.1265120009581248
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,16,32,0,0.1970026691754659
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,16,64,0,0.16214399536450705
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,16,64,0,0.18900267283121744
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,16,128,0,0.13769599795341492
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,16,1,0,0.0972053309281667
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,16,1,0,0.12567466497421265
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,16,8,0,0.137472003698349
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,16,2,0,0.09623466928799947
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,16,2,0,0.1606613298257192
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,16,32,0,0.15033066272735596
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,16,32,0,0.15998933712641397
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,16,4,0,0.10026666522026062
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,16,4,0,0.14083199699719748
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,16,8,0,0.14172800381978354
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,16,16,0,0.09451733032862346
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,16,16,0,0.16647467017173767
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,16,64,0,0.12520000338554382
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,16,64,0,0.19308799505233765
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,16,128,0,0.11114666859308879
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,16,128,0,0.19748266537984213
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,16,1,0,0.10085333387056987
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,16,8,0,0.11152000228563945
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,16,1,0,0.12993599971135458
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,16,2,0,0.09641066193580627
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,16,2,0,0.2030986746152242
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,16,4,0,0.12479466199874878
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,16,4,0,0.1606613298257192
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,16,8,0,0.2048799991607666
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,16,16,0,0.1583626667658488
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,16,16,0,0.13571733236312866
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,16,32,0,0.14526399970054626
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,16,32,0,0.15960533420244852
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,16,64,0,0.15782399972279867
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,16,64,0,0.16204266746838888
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,16,128,0,0.15528000394503275
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,16,128,0,0.20307199160257974
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,16,4,0,0.11142399907112122
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,16,4,0,0.13136000434557596
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,16,1,0,0.11552533507347107
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,16,1,0,0.1954186757405599
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,16,2,0,0.12830400466918945
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,16,2,0,0.19938133160273233
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,16,32,0,0.19085333744684854
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,16,8,0,0.1455893317858378
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,16,8,0,0.12845333417256674
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,16,16,0,0.10477333267529805
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,16,128,0,0.19057067235310873
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,16,16,0,0.20315200090408325
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,16,32,0,0.11618666847546895
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,16,64,0,0.1239946683247884
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,16,64,0,0.20114133755366007
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,16,128,0,0.1467466652393341
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,16,1,0,0.1853920022646586
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,16,1,0,0.34401599566141766
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,16,8,0,0.19575466712315878
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,16,2,0,0.11241599917411804
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,16,2,0,0.2099413275718689
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,16,4,0,0.1418826679388682
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,16,4,0,0.16263999541600546
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,16,8,0,0.11689066886901855
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,16,16,0,0.1591146687666575
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,16,128,0,0.12429333726565044
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,16,16,0,0.2041226625442505
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,16,32,0,0.10082667072614034
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,16,32,0,0.12844266494115195
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,16,64,0,0.16386666893959045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,16,64,0,0.14917866388956705
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,16,128,0,0.2025173306465149
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,16,1,0,0.3246293266614278
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,16,1,0,0.6382346550623575
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,16,2,0,0.1904266675313314
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,16,16,0,0.13035200039545694
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,16,2,0,0.38728535175323486
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,16,4,0,0.15552533666292825
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,16,4,0,0.26633065938949585
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,16,8,0,0.11397866408030193
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,16,8,0,0.21716799338658652
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,16,16,0,0.1800160010655721
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,16,32,0,0.15652799606323242
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,16,32,0,0.17259732882181802
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,16,64,0,0.14672533671061197
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,16,64,0,0.19790399074554443
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,16,128,0,0.1544266641139984
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,16,128,0,0.15119999647140503
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,16,2,0,0.7467146714528402
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,16,1,0,0.6002933184305826
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,16,8,0,0.361194650332133
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,16,1,0,1.2158346970876057
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,16,2,0,0.32819199562072754
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,16,4,0,0.18718934059143066
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,16,4,0,0.5002613464991251
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,16,8,0,0.13612799843152365
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,16,16,0,0.1572213371594747
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,16,16,0,0.31409599383672077
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,16,32,0,0.1309760014216105
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,16,32,0,0.2950933376948039
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,16,64,0,0.1249120036760966
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,16,64,0,0.25310399134953815
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,16,128,0,0.14613333344459534
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,16,128,0,0.23963199059168497
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,16,1,0,1.148154656092326
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,256,16,1,0,2.3694027264912925
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,16,2,0,0.6064000129699707
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,256,16,2,0,1.4344852765401204
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,16,4,0,0.3337973356246948
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,256,16,4,0,0.9631573359171549
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,16,8,0,0.19497066736221313
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,256,16,8,0,0.7183307011922201
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,16,16,0,0.13774933417638144
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,256,16,16,0,0.5833973487218221
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,16,128,0,0.14587199687957764
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,16,32,0,0.12095999717712402
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,256,16,32,0,0.5450186729431152
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,16,64,0,0.1572480003039042
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,256,16,64,0,0.47280001640319824
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,256,16,128,0,0.44045865535736084
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,32,1,0,0.10012267033259074
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,32,1,0,0.1279146671295166
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,32,2,0,0.1016319990158081
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,32,2,0,0.12985066572825113
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,32,4,0,0.12478933731714885
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,32,32,0,0.12097600102424622
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,32,4,0,0.12273066242535909
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,32,64,0,0.12452266613642375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,32,8,0,0.1425226628780365
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,32,8,0,0.1286133329073588
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,32,16,0,0.1497119963169098
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,32,16,0,0.2002506653467814
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,32,32,0,0.1460906664530436
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,32,64,0,0.18896534045537314
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,32,128,0,0.15707199772198996
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,32,128,0,0.15270933508872986
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,32,1,0,0.09852799773216248
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,32,1,0,0.12769599755605063
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,32,2,0,0.09970666964848836
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,32,2,0,0.1416213313738505
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,32,4,0,0.10222400228182475
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,32,4,0,0.13890666762987772
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,32,8,0,0.12152533729871114
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,32,8,0,0.15102400382359824
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,32,16,0,0.15969600280125937
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,32,16,0,0.155349334081014
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,32,32,0,0.15494400262832642
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,32,32,0,0.14571733276049295
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,32,64,0,0.14788267016410828
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,32,64,0,0.2036906679471334
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,32,2,0,0.1304853359858195
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,32,128,0,0.12443199753761292
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,32,128,0,0.19578667481740317
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,32,1,0,0.10109866658846538
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,32,8,0,0.15768000483512878
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,32,1,0,0.12748266259829202
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,32,2,0,0.0997759997844696
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,32,4,0,0.14724799990653992
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,32,4,0,0.20402665932973227
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,32,8,0,0.10874133308728536
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,32,16,0,0.14946666359901428
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,32,16,0,0.16273599863052368
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,32,32,0,0.1541920006275177
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,32,32,0,0.1590986649195353
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,32,64,0,0.15385066469510397
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,32,64,0,0.19812800486882529
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,32,128,0,0.1584053337574005
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,32,4,0,0.1558026671409607
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,32,128,0,0.14851733048756918
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,32,1,0,0.11035199960072835
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,32,1,0,0.1929439902305603
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,32,2,0,0.1586079994837443
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,32,2,0,0.15621866782506308
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,32,4,0,0.19777599970499674
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,32,8,0,0.15838399529457092
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,32,8,0,0.20065067211786905
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,32,16,0,0.11545600493748982
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,32,16,0,0.15665066242218018
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,32,32,0,0.1302720010280609
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,32,32,0,0.20298665761947632
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,32,64,0,0.12755200266838074
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,32,64,0,0.19442667563756308
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,32,128,0,0.15995200475056967
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,32,128,0,0.19362666209538779
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,32,1,0,0.18488534291585287
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,32,1,0,0.3388426701227824
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,32,2,0,0.13581867019335428
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,32,2,0,0.21029333273569742
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,32,4,0,0.12598933776219687
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,32,4,0,0.13505066434542337
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,32,8,0,0.15892266233762106
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,32,64,0,0.12622933586438498
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,32,64,0,0.15733866890271506
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,32,8,0,0.19477333625157675
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,32,16,0,0.11249066392580669
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,32,16,0,0.1529813309510549
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,32,32,0,0.10159466663996379
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,32,32,0,0.18292266130447388
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,32,128,0,0.10068800052007039
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,32,128,0,0.15897066394488016
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,32,1,0,0.32340800762176514
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,32,1,0,0.6301173369089762
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,32,2,0,0.18193066120147705
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,32,2,0,0.3770666519800822
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,32,4,0,0.1405226687590281
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,32,4,0,0.2504746715227763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,32,8,0,0.1249120036760966
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,32,8,0,0.1852746605873108
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,32,16,0,0.12274666627248128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,32,16,0,0.20521066586176553
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,32,32,0,0.12013866504033406
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,32,32,0,0.16530666748682657
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,32,1,0,1.1986080010732014
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,32,2,0,0.32094399134318036
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,32,64,0,0.16057067116101584
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,32,64,0,0.20503467321395874
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,32,128,0,0.15853333473205566
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,32,128,0,0.15876266360282898
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,32,1,0,0.5989439884821574
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,32,2,0,0.7183146476745605
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,32,16,0,0.29021332661310834
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,32,4,0,0.18904000520706177
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,32,4,0,0.47520001729329425
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,32,8,0,0.13447466492652893
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,32,8,0,0.34299735228220624
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,32,16,0,0.1581493318080902
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,32,32,0,0.12311466534932454
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,32,32,0,0.2591093381245931
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,32,64,0,0.12739200393358865
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,32,64,0,0.24183466037114462
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,32,128,0,0.13521066308021545
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,32,128,0,0.21785600980122885
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,32,1,0,1.1488800048828125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,32,1,0,2.329199949900309
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,32,2,0,0.6039146582285563
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,32,2,0,1.3880213101704915
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,32,16,0,0.16350932916005453
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,32,4,0,0.32812267541885376
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,32,4,0,0.9142666657765707
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,32,8,0,0.1874879995981852
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,32,8,0,0.6628959973653158
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,32,16,0,0.5311946471532186
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,32,32,0,0.15181333820025125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,32,32,0,0.48260800043741864
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,32,1,0,2.2387359937032065
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,32,64,0,0.1276479959487915
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,32,64,0,0.464469313621521
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,32,128,0,0.12897599736849466
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,32,128,0,0.39856000741322833
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,256,32,1,0,4.592165311177571
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,256,32,8,0,1.2874346574147542
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,32,2,0,1.1597119967142742
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,256,32,2,0,2.716047922770182
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,32,4,0,0.6174026727676392
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,256,32,4,0,1.7786399523417156
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,32,64,0,0.15813866257667542
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,32,8,0,0.3423839807510376
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,32,16,0,0.2027733325958252
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,256,32,16,0,1.056997299194336
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,32,32,0,0.15808533628781637
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,256,32,32,0,0.9108426570892334
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,256,32,64,0,0.8846933046976725
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,32,128,0,0.13105066617329916
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,256,32,128,0,0.771392027537028
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,64,1,0,0.09553066889444987
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,64,1,0,0.12111467123031616
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,64,2,0,0.15119466185569763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,64,2,0,0.19925334056218466
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,64,4,0,0.15171733498573303
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,64,32,0,0.1553973356882731
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,64,4,0,0.1251359979311625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,64,8,0,0.13102933764457703
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,64,8,0,0.1646453340848287
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,64,16,0,0.1188266674677531
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,64,16,0,0.11918933192888896
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,64,32,0,0.19419733683268228
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,64,64,0,0.15315733353296915
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,64,64,0,0.19125332434972128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,64,128,0,0.15470932920773825
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,64,128,0,0.19868799050649008
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,64,1,0,0.10428800185521443
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,64,1,0,0.12732799847920737
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,64,2,0,0.1507306694984436
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,64,16,0,0.16083733240763345
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,64,2,0,0.19343467553456625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,64,4,0,0.14845866958300272
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,64,4,0,0.19479467471440634
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,64,8,0,0.15320533514022827
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,64,8,0,0.1935946742693583
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,64,16,0,0.12397866447766621
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,64,32,0,0.1530933380126953
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,64,32,0,0.15003732840220133
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,64,64,0,0.15220800042152405
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,64,2,0,0.1500746707121531
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,64,64,0,0.1926400065422058
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,64,128,0,0.157258669535319
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,64,128,0,0.19510932763417563
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,64,1,0,0.11135466893513997
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,64,1,0,0.19460799296696982
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,64,2,0,0.16221333543459573
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,64,16,0,0.20790932575861612
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,64,4,0,0.15465600291887918
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,64,4,0,0.20195200045903525
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,64,8,0,0.15102400382359824
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,64,8,0,0.19767467180887857
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,64,16,0,0.15777066349983215
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,64,32,0,0.2291626731554667
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,64,32,0,0.1574026644229889
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,64,1,0,0.3410400152206421
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,64,64,0,0.15800000230471292
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,64,64,0,0.20060267051060995
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,64,128,0,0.15076266725858053
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,64,4,0,0.20786666870117188
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,64,128,0,0.19157334168752035
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,64,1,0,0.1856266657511393
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,64,2,0,0.1911626656850179
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,64,2,0,0.20211732387542725
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,64,4,0,0.1541813313961029
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,64,8,0,0.1585493286450704
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,64,8,0,0.19572800397872925
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,64,16,0,0.1056106686592102
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,64,16,0,0.19762667020161948
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,64,32,0,0.15408000349998474
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,64,32,0,0.16126400232315063
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,64,64,0,0.12051199873288472
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,64,64,0,0.19828800360361734
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,64,128,0,0.1548533340295156
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,64,128,0,0.1575146714846293
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,64,1,0,0.3250666658083598
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,64,1,0,0.6313813527425131
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,64,2,0,0.1874026656150818
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,64,2,0,0.37547731399536133
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,64,4,0,0.13039466738700867
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,64,4,0,0.23896533250808716
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,64,8,0,0.15680000185966492
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,64,8,0,0.1698453426361084
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,64,16,0,0.15852800011634827
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,64,16,0,0.19799466927846274
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,64,32,0,0.1527413328488668
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,64,32,0,0.1607253352801005
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,64,1,0,1.2082560062408447
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,64,64,0,0.15577066938082376
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,64,64,0,0.19442667563756308
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,64,128,0,0.16131200393040976
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,64,128,0,0.18241065740585327
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,64,1,0,0.6028266747792562
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,64,2,0,0.32682132720947266
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,64,2,0,0.716543992360433
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,64,4,0,0.18172266085942587
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,64,32,0,0.234607994556427
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,64,4,0,0.4542400042215983
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,64,64,0,0.1251520017782847
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,64,8,0,0.13158399860064188
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,64,8,0,0.3200800021489461
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,64,16,0,0.1562399963537852
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,64,16,0,0.2585546573003133
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,64,32,0,0.15613866845766702
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,64,64,0,0.20803733666737875
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,64,4,0,0.3276639978090922
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,64,128,0,0.15762133399645487
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,64,128,0,0.1999946633974711
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,64,1,0,1.2818400065104167
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,64,2,0,0.6041386524836222
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,64,1,0,2.3556267420450845
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,64,2,0,1.37500794728597
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,64,4,0,0.8714346885681152
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,64,32,0,0.41888535022735596
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,64,8,0,0.19021866718928018
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,64,8,0,0.6118026574452718
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,64,16,0,0.13617066542307535
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,64,16,0,0.48057599862416583
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,64,32,0,0.15665066242218018
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,64,64,0,0.15920533736546835
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,64,64,0,0.39608001708984375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,64,128,0,0.12599999705950418
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,64,128,0,0.3636106650034587
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,64,1,0,2.2358293533325195
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,64,4,0,1.6866933504740398
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,64,16,0,0.2005066672960917
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,64,1,0,4.649696032206218
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,64,2,0,1.1504159768422444
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,64,16,0,0.9296800295511881
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,64,2,0,2.693104108174642
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,64,4,0,0.6114240090052286
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,64,8,0,0.3401706616083781
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,64,128,0,0.15124799807866415
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,64,8,0,1.1807466348012288
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,64,32,0,0.1604213317235311
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,64,32,0,0.7991893291473389
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,64,64,0,0.15269333124160767
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,64,64,0,0.7494080066680908
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,64,128,0,0.728058656056722
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,64,1,0,4.397616068522136
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,256,64,4,0,3.3231681187947593
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,64,2,0,2.2542239824930825
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,256,64,1,0,9.190714518229166
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,64,4,0,1.1800159613291423
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,256,64,2,0,5.326277414957683
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,64,8,0,0.6442826588948568
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,256,64,8,0,2.3204906781514487
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,64,16,0,0.3668160041173299
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,256,64,16,0,1.8306934038798015
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,64,32,0,0.23034133513768515
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,256,64,32,0,1.587183952331543
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,64,64,0,0.16644799709320068
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,256,64,64,0,1.4636853535970051
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,64,128,0,0.1541706621646881
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,128,4,0,0.1223413348197937
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,256,64,128,0,1.4022080103556316
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,128,1,0,0.0950933297475179
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,128,2,0,0.15311466654141745
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,128,8,0,0.15218666195869446
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,128,16,0,0.1885226567586263
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,128,32,0,0.1508799990018209
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,128,64,0,0.12563199798266092
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,128,128,0,0.12731200456619263
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,128,1,0,0.11682132879892985
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,128,2,0,0.12345600128173828
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,128,4,0,0.1530400017897288
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,128,16,0,0.15012266238530478
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,128,8,0,0.10745066404342651
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,128,64,0,0.09976533055305481
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,128,32,0,0.20386666059494019
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,128,128,0,0.19641067584355673
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,128,1,0,0.23054399092992148
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,128,2,0,0.1165066659450531
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,128,4,0,0.10965333382288615
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,128,16,0,0.09716799855232239
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,128,8,0,0.19641067584355673
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,128,64,0,0.10825600226720174
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,128,128,0,0.1222826639811198
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,128,32,0,0.10268800457318623
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,128,1,0,0.33556799093882245
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,128,2,0,0.18683199087778726
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,128,4,0,0.11312533418337505
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,128,8,0,0.10713066657384236
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,128,16,0,0.11104533076286316
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,128,32,0,0.10645332932472229
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,128,128,0,0.10456533233324687
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,128,64,0,0.11184533437093098
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,128,1,0,0.6211680173873901
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,128,2,0,0.3359839916229248
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,128,4,0,0.18397333224614462
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,128,8,0,0.16849599281946817
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,128,16,0,0.10012800494829814
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,128,32,0,0.10162666440010071
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,128,128,0,0.10162666440010071
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,128,64,0,0.10178666313489278
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,128,1,0,1.187274694442749
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,128,2,0,0.6185813347498575
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,128,4,0,0.33154133955637616
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,128,8,0,0.19585599501927695
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,128,16,0,0.11723732948303223
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,128,32,0,0.10340266426404317
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,128,128,0,0.10234133402506511
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,128,64,0,0.11134399970372517
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,128,1,0,2.307520071665446
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,128,2,0,1.1879839897155762
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,128,4,0,0.6245973507563273
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,128,8,0,0.34197866916656494
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,128,16,0,0.20059732596079508
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,128,32,0,0.13023466865221658
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,128,128,0,0.10420266787211101
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,128,64,0,0.10483733812967937
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,128,1,0,4.536447842915853
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,128,2,0,2.324010690053304
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,128,4,0,1.194602648417155
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,128,8,0,0.6460053523381551
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,128,16,0,0.37250133355458576
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,128,32,0,0.23225067059199014
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,128,64,0,0.16742932796478271
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,128,128,0,0.11079466342926025
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,128,2,0,4.595461209615071
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,128,1,0,9.054154713948568
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,128,4,0,2.3444533348083496
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,128,8,0,1.2473920186360676
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,128,16,0,0.6976693471272787
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,128,32,0,0.42452800273895264
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,128,64,0,0.28736533721288043
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,128,128,0,0.20480000972747803
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,256,2,0,0.11714133620262146
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,256,2,0,0.12229333321253459
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,256,4,0,0.12298132975896199
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,256,4,0,0.12618666887283325
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,256,8,0,0.162581334511439
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,256,8,0,0.16785067319869995
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,256,16,0,0.09722666939099629
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,256,1,0,0.1267733375231425
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,256,16,0,0.14474667112032572
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,256,1,0,0.13858133554458618
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,256,32,0,0.11805867155392964
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,256,32,0,0.1418346663316091
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,256,64,0,0.10466133554776509
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,256,64,0,0.11185600360234578
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,256,128,0,0.09447999795277913
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,256,128,0,0.10994133353233337
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,256,1,0,0.13197333614031473
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,256,1,0,0.20362667242685953
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,256,2,0,0.11821867028872173
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,256,2,0,0.1828533411026001
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,256,4,0,0.15972266594568887
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,256,4,0,0.1441920002301534
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,256,8,0,0.1220906674861908
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,256,8,0,0.11618133385976155
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,256,16,0,0.10605333248774211
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,256,16,0,0.12997333208719888
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,256,32,0,0.10000532865524292
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,256,32,0,0.14680533607800803
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,256,64,0,0.09644266963005066
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,256,64,0,0.11752000451087952
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,256,1,0,0.3590506712595622
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,256,1,0,0.23308799664179483
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,256,2,0,0.1969333291053772
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,256,2,0,0.15498666961987814
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,256,4,0,0.1251200040181478
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,256,4,0,0.11375466982523601
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,256,8,0,0.13015466928482056
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,256,8,0,0.130949338277181
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,256,16,0,0.10102400183677673
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,256,16,0,0.1530933380126953
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,256,32,0,0.11839999755223592
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,256,32,0,0.11424533526102702
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,256,64,0,0.13515200217564902
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,256,64,0,0.1306719978650411
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,256,128,0,0.09697066744168599
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,256,128,0,0.15090133746465048
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,256,1,0,0.6573866605758667
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,256,1,0,0.42984533309936523
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,256,2,0,0.35861865679423016
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,256,2,0,0.23703465859095255
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,256,4,0,0.19968533515930176
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,256,4,0,0.15004799763361612
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,256,8,0,0.1962133248647054
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,256,8,0,0.13554666439692178
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,256,16,0,0.12065066893895467
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,256,16,0,0.1474613348642985
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,256,32,0,0.11645866433779399
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,256,32,0,0.1698346734046936
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,256,64,0,0.13132266203562418
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,256,64,0,0.13202133774757385
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,256,128,0,0.1553600033124288
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,256,128,0,0.31915199756622314
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,256,1,0,1.2570239702860515
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,256,1,0,0.8007307052612305
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,256,2,0,0.6797546545664469
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,256,2,0,0.4284106492996216
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,256,128,0,0.17433067162831625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,256,4,0,0.3570079803466797
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,256,4,0,0.24532800912857056
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,256,8,0,0.20472000042597452
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,256,8,0,0.14179733395576477
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,256,128,0,0.11488533020019531
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,256,32,0,0.12078400452931722
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,256,16,0,0.13529066244761148
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,256,16,0,0.1766186753908793
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,256,64,0,0.09757333000500996
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,256,32,0,0.1776319940884908
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,256,64,0,0.16383999586105347
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,256,128,0,0.15566399693489075
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,256,128,0,0.17285333077112833
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,256,1,0,2.457322597503662
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,256,2,0,0.8035946687062582
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,256,2,0,1.2641332944234211
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,256,4,0,0.6742986838022867
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,256,4,0,0.4375893274943034
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,256,8,0,0.36298131942749023
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,256,8,0,0.254314661026001
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,256,16,0,0.21555733680725098
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,256,16,0,0.17717333634694418
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,256,32,0,0.14062399665514627
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,256,32,0,0.17146132389704385
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,256,64,0,0.1241973340511322
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,256,64,0,0.1869493325551351
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,256,128,0,0.15171733498573303
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,256,128,0,0.14664000272750854
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,256,1,0,4.836720148722331
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,256,1,0,3.0139894485473633
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,256,2,0,2.4743199348449707
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,256,2,0,1.5560533205668132
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,256,4,0,1.289189338684082
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,256,4,0,0.8315093517303467
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,256,8,0,0.6860266526540121
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,256,8,0,0.46568532784779865
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,256,16,0,0.3920106490453084
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,256,16,0,0.2806933323542277
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,256,64,0,0.17633066574732462
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,256,32,0,0.24106132984161377
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,256,32,0,0.18839999039967856
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,256,64,0,0.14531733592351279
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,256,128,0,0.11514133214950562
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,256,128,0,0.14548266927401224
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,256,1,0,9.628976186116537
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,256,1,0,5.973381042480469
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,256,2,0,4.90772279103597
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,256,2,0,3.054938634236654
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,256,1,0,1.54419740041097
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,256,4,0,2.5189332962036133
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,256,16,0,0.7347253163655599
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,256,8,0,1.3216853141784668
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,256,32,0,0.44549866517384845
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,256,8,0,0.8813013235727946
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,256,64,0,0.240447998046875
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,256,128,0,0.21006399393081665
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,256,16,0,0.5187520186106364
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,256,32,0,0.33694398403167725
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,256,64,0,0.2977439959843953
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,256,128,0,0.18837867180506387
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,256,256,2,0,9.75650151570638
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,256,256,1,0,11.887989044189453
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,256,256,2,0,6.044810612996419
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,256,256,1,0,19.249759674072266
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,256,256,4,0,5.003610610961914
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,256,256,8,0,2.60204807917277
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,256,256,4,0,3.161967913309733
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,256,256,8,0,1.7164959907531738
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,256,256,16,0,0.987722635269165
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,256,256,32,0,0.8400479952494303
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,256,256,64,0,0.55458664894104
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,256,256,32,0,0.6282506783803304
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,512,1,0,0.2273706595102946
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,256,256,64,0,0.4466240008672078
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,512,2,0,0.13738666971524557
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,512,2,0,0.13447999954223633
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,256,256,128,0,0.3885013262430827
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,256,256,128,0,0.3546026547749837
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,512,1,0,0.1432960033416748
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,512,4,0,0.10504532853762309
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,512,4,0,0.16774400075276694
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,512,8,0,0.12142399946848552
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,512,32,0,0.13728533188501993
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,512,8,0,0.12186132868131001
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,512,16,0,0.15981866916020712
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,512,64,0,0.14120533068974814
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,512,16,0,0.16964799165725708
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,512,32,0,0.10614933570226033
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,256,4,0,1.60263458887736
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,512,64,0,0.12663466731707254
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,512,2,0,0.2229493260383606
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,512,128,0,0.09883733590443929
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,512,128,0,0.14409599701563516
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,512,1,0,0.40747201442718506
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,512,1,0,0.24145599206288657
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,512,2,0,0.1387999951839447
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,512,4,0,0.15984533230463663
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,512,4,0,0.12293866276741028
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,512,8,0,0.10042132933934529
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,512,8,0,0.17267733812332153
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,512,16,0,0.11707199613253276
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,512,64,0,0.1569706698258718
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,512,16,0,0.1267680029074351
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,512,32,0,0.10317867000897725
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,512,1,0,0.7582506338755289
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,512,32,0,0.13390933473904928
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,512,64,0,0.10483733812967937
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,512,2,0,0.2436479926109314
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,256,256,16,0,1.4367893536885579
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,512,128,0,0.20195200045903525
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,512,128,0,0.12891733646392822
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,512,1,0,0.4436853329340617
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,512,2,0,0.39822932084401447
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,512,4,0,0.21928532918294272
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,512,4,0,0.1418293317159017
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,512,8,0,0.13475199540456137
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,512,8,0,0.1206666628519694
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,512,16,0,0.1300373375415802
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,512,16,0,0.12989866733551025
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,512,128,0,0.09898666540781657
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,512,32,0,0.11409067114194234
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,512,32,0,0.14239999651908875
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,512,64,0,0.09959466258684795
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,512,64,0,0.14004266262054443
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,512,128,0,0.14341866970062256
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,512,1,0,1.4438719749450684
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,512,1,0,0.8249813715616862
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,512,2,0,0.7449759642283121
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,512,2,0,0.4457013209660848
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,512,4,0,0.24674133459726968
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,512,4,0,0.4005333185195923
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,512,8,0,0.22524799903233847
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,512,8,0,0.15759999553362528
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,512,16,0,0.15330132842063904
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,512,16,0,0.16176000237464905
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,512,32,0,0.10273599624633789
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,512,32,0,0.12739200393358865
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,512,1,0,1.5914239883422852
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,512,64,0,0.11150399843851726
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,512,2,0,1.4345439275105794
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,512,64,0,0.16170666615168253
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,512,128,0,0.11496532956759135
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,512,128,0,0.14432000120480856
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,512,1,0,2.8202187220255532
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,512,2,0,0.8346719741821289
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,512,4,0,0.7507359981536865
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,512,32,0,0.16522133350372314
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,512,32,0,0.13738133509953818
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,512,64,0,0.14361066619555155
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,512,4,0,0.44954665501912433
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,512,128,0,0.153738667567571
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,512,8,0,0.40880000591278076
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,512,8,0,0.2620053291320801
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,512,16,0,0.2588319977124532
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,512,2,0,2.8166186014811196
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,512,16,0,0.1732800006866455
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,512,64,0,0.15998400251070657
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,512,128,0,0.1507200002670288
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,512,8,0,0.7749280134836832
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,512,1,0,3.127370516459147
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,512,1,0,5.561450958251953
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,512,2,0,1.6084480285644531
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,512,4,0,1.453152020772298
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,512,4,0,0.8520320256551107
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,512,8,0,0.4781493345896403
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,512,64,0,0.14199466506640115
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,512,16,0,0.48209067185719806
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,512,16,0,0.2906293272972107
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,512,32,0,0.2893386681874593
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,512,32,0,0.19034133354822794
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,512,64,0,0.19113600254058838
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,512,128,0,0.15085867047309875
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,512,128,0,0.14356799920399985
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,512,4,0,2.8485279083251953
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,512,1,0,11.052186330159506
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,512,1,0,6.173072179158528
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,512,2,0,5.544901529947917
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,512,2,0,3.1696640650431314
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,512,4,0,1.6506986618041992
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,512,8,0,1.509717305501302
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,512,8,0,0.9087573687235514
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,512,16,0,0.9159093697865804
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,512,16,0,0.5301653146743774
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,512,128,0,0.19921066363652548
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,512,32,0,0.5357760190963745
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,512,32,0,0.3429653247197469
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,512,64,0,0.34539735317230225
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,512,64,0,0.24795732895533243
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,512,128,0,0.25012799104054767
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,128,512,2,0,11.122234344482422
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,128,512,1,0,12.292954762776693
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,128,512,2,0,6.227461496988933
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,128,512,4,0,5.642730712890625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,128,512,4,0,3.266074816385905
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,128,512,1,0,21.910181681315105
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,128,512,8,0,2.9581867853800454
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,128,512,8,0,1.768191973368327
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,128,512,16,0,1.7876799901326497
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,128,512,16,0,1.0050133069356282
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,128,512,32,0,1.025055964787801
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,128,512,32,0,0.6346293290456136
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,1024,1,0,0.26393065849939984
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,128,512,64,0,0.6506880124409994
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,128,512,64,0,0.4532800118128459
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,128,512,128,0,0.4607946475346883
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,128,512,128,0,0.36352535088857013
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,1024,1,0,0.26343466838200885
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,1024,2,0,0.15711999932924905
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,1024,16,0,0.1308693289756775
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,1024,2,0,0.15687466661135355
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,1024,4,0,0.124399999777476
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,1024,4,0,0.17189866304397583
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,1024,8,0,0.17297067244847616
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,1024,8,0,0.13248533010482788
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,1024,16,0,0.1774186690648397
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,1024,32,0,0.14421332875887552
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,1024,32,0,0.16294399897257486
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,1024,64,0,0.14816000064214072
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,1024,64,0,0.1358506679534912
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,1024,128,0,0.1618133286635081
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,1024,128,0,0.14613333344459534
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,1024,4,0,0.16107199589411417
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,1024,1,0,0.483295996983846
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,1024,1,0,0.48509331544240314
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,1024,2,0,0.2688480019569397
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,1024,2,0,0.2693600058555603
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,1024,4,0,0.16127467155456543
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,1024,8,0,0.16795200109481812
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,1024,8,0,0.1697280009587606
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,1024,64,0,0.13910399874051413
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,1024,16,0,0.17699732383092245
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,1024,128,0,0.16745599110921225
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,1024,1,0,0.909711996714274
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,1024,16,0,0.21567465861638388
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,1024,32,0,0.16620266437530518
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,1024,32,0,0.18134933710098267
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,1024,64,0,0.14707733194033304
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,1024,4,0,0.27516265710194904
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,1024,128,0,0.17144532998402914
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,1024,1,0,0.9128159681955973
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,1024,2,0,0.4864799976348877
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,1024,2,0,0.4930826822916667
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,1024,4,0,0.2757226626078288
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,1024,8,0,0.17965332667032877
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,1024,64,0,0.1688693364461263
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,1024,8,0,0.169429341952006
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,1024,16,0,0.1748853325843811
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,1024,16,0,0.17754666010538736
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,1024,32,0,0.14875200390815735
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,1024,32,0,0.14551466703414917
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,1024,1,0,1.778293291727702
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,1024,64,0,0.18515199422836304
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,1024,128,0,0.16872000694274902
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,1024,128,0,0.18330667416254678
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,1024,2,0,0.917626698811849
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,1024,1,0,1.7630772590637207
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,1024,2,0,0.9223146438598633
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,1024,16,0,0.1930346687634786
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,1024,4,0,0.4965333143870036
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,1024,4,0,0.4989440043767293
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,1024,8,0,0.28571200370788574
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,1024,8,0,0.2874559958775838
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,1024,16,0,0.20539732774098715
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,1024,32,0,0.13988266388575235
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,1024,32,0,0.1656000018119812
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,1024,64,0,0.13955733180046082
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,1024,64,0,0.17783466974894205
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,1024,128,0,0.14965333541234335
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,1024,128,0,0.14328533411026
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,1024,4,0,0.9420533180236816
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,1024,1,0,3.447018623352051
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,1024,2,0,1.767247994740804
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,1024,1,0,3.452687899271647
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,1024,2,0,1.782591978708903
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,1024,4,0,0.9442026615142822
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,1024,8,0,0.5195146799087524
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,1024,8,0,0.5231519937515259
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,1024,16,0,0.33825067679087323
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,1024,16,0,0.3408000071843465
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,1024,128,0,0.14808000127474466
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,1024,32,0,0.2198293407758077
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,1024,32,0,0.22561599810918173
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,1024,64,0,0.16159466902414957
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,1024,2,0,3.494154612223307
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,1024,64,0,0.17734932899475098
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,1024,128,0,0.12583999832471213
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,1024,1,0,6.852352142333984
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,1024,2,0,3.476325352986654
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,1024,1,0,6.840917587280273
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,1024,4,0,1.8201813697814941
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,1024,8,0,0.9926613171895345
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,1024,4,0,1.82152525583903
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,1024,8,0,0.9940266609191895
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,1024,64,0,0.2794133424758911
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,1024,16,0,0.624181350072225
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,1024,16,0,0.6243893305460612
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,1024,32,0,0.39028799533843994
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,1024,32,0,0.39236799875895184
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,1024,64,0,0.2770613431930542
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,1024,128,0,0.2135253349939982
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,1024,128,0,0.2137653430302938
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,64,1024,2,0,6.91432507832845
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,64,1024,1,0,13.560192108154297
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,64,1024,4,0,3.615466753641764
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,64,1024,2,0,6.894442876180013
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,64,1024,1,0,13.586421966552734
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,64,1024,4,0,3.6045118967692056
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,64,1024,8,0,1.9348692893981934
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,64,1024,8,0,1.9400053024291992
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,64,1024,16,0,1.1930346488952637
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,64,1024,16,0,1.1972906589508057
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,64,1024,32,0,0.7382506529490153
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,64,1024,32,0,0.7310400009155273
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,64,1024,64,0,0.5049973328908285
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,64,1024,64,0,0.5067733526229858
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,64,1024,128,0,0.3926080067952474
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,64,1024,128,0,0.3913653294245402
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,1536,1,0,0.4089226722717285
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,1536,1,0,0.4078720013300578
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,1536,2,0,0.23273066679636636
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,1536,2,0,0.2304746707280477
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,1536,4,0,0.18016533056894937
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,1536,4,0,0.1744746764500936
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,1536,8,0,0.134442667166392
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,1536,8,0,0.14896532893180847
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,1536,16,0,0.13592533270517984
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,1536,32,0,0.14074132839838663
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,1536,16,0,0.16429866353670755
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,1536,32,0,0.13953066865603128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,1536,64,0,0.17381866772969565
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,1536,64,0,0.14326399564743042
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,1536,128,0,0.16570132970809937
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,1536,128,0,0.1763040026028951
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,1536,1,0,0.7823092937469482
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,1536,1,0,0.7589226563771566
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,1536,8,0,0.1782240072886149
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,1536,2,0,0.41338666280110675
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,1536,2,0,0.41387732823689777
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,1536,4,0,0.24109333753585815
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,1536,4,0,0.24042133490244547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,1536,8,0,0.16565866271654764
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,1536,64,0,0.1679146687189738
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,1536,16,0,0.13709333539009094
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,1536,128,0,0.1739893356959025
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,1536,16,0,0.1551199952761332
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,1536,32,0,0.17060800393422446
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,1536,2,0,0.7716373602549235
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,1536,32,0,0.18286933501561484
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,1536,64,0,0.12121599912643433
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,1536,128,0,0.13395733634630838
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,1536,1,0,1.4545653661092122
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,1536,1,0,1.4628480275472004
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,1536,2,0,0.7706773281097412
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,1536,4,0,0.4257226785024007
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,1536,4,0,0.42893866697947186
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,1536,8,0,0.2469866673151652
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,1536,8,0,0.2456106742223104
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,1536,16,0,0.1692906618118286
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,1536,16,0,0.1960373322168986
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,1536,32,0,0.1413386662801107
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,1536,32,0,0.14857600132624307
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,1536,64,0,0.17001599073410034
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,1536,1,0,2.848965326944987
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,1536,64,0,0.141077329715093
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,1536,128,0,0.1564959983030955
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,1536,128,0,0.1253706713517507
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,1536,1,0,2.8576745986938477
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,1536,2,0,1.478490670522054
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,1536,16,0,0.28491199016571045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,1536,2,0,1.4766133626302083
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,1536,4,0,0.7893653710683187
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,1536,32,0,0.18555732568105063
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,1536,4,0,0.7847466468811035
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,1536,8,0,0.4417119820912679
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,1536,8,0,0.4386399984359741
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,1536,16,0,0.2906186580657959
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,1536,32,0,0.19151467084884644
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,1536,64,0,0.14436800281206766
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,1536,64,0,0.18075732390085855
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,1536,128,0,0.12502933541933695
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,1536,2,0,2.90229860941569
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,1536,128,0,0.18147732814153036
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,1536,1,0,5.675253550211589
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,1536,1,0,5.642453511555989
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,1536,2,0,2.9133227666219077
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,1536,16,0,0.5222026507059733
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,1536,4,0,1.5149866739908855
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,1536,4,0,1.5229759216308594
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,1536,8,0,0.826304038365682
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,1536,8,0,0.8398613135019938
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,1536,16,0,0.5202240149180094
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,1536,32,0,0.3277386625607808
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,1536,32,0,0.3319200078646342
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,1536,64,0,0.22975999116897583
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,1536,64,0,0.22921067476272583
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,1536,128,0,0.18025600910186768
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,1536,128,0,0.17918932437896729
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,1536,2,0,5.740053176879883
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,1536,1,0,11.2314453125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,1536,1,0,11.243802388509115
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,1536,4,0,2.9780906041463218
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,1536,2,0,5.718058904012044
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,1536,32,0,0.599178671836853
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,1536,4,0,2.980213483174642
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,1536,8,0,1.5914986928304036
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,1536,64,0,0.41484268506368
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,1536,8,0,1.586703936258952
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,1536,16,0,0.9798186620076498
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,1536,16,0,0.9787946542104086
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,1536,32,0,0.6012426614761353
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,1536,64,0,0.4102613528569539
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,1536,128,0,0.314463992913564
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,1536,128,0,0.3141439954439799
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,2048,1,0,0.579578677813212
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,2048,8,0,0.13666133085886636
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,2048,1,0,0.577018658320109
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,2048,2,0,0.3184640010197957
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,2048,2,0,0.3161440094312032
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,2048,4,0,0.1928053299585978
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,2048,32,0,0.15633066495259604
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,2048,4,0,0.1929653286933899
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,2048,8,0,0.14874666929244995
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,2048,16,0,0.13334932923316956
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,2048,16,0,0.12239467104276021
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,2048,32,0,0.1267093320687612
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,2048,64,0,0.16287466883659363
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,2048,1,0,1.0951039791107178
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,2048,2,0,0.5781013170878092
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,2048,64,0,0.15432533621788025
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,2048,128,0,0.17268800735473633
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,2048,128,0,0.16356266538302103
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,2048,1,0,1.092741330464681
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,2048,2,0,0.5801599820454916
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,2048,4,0,0.3258773287137349
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,2048,4,0,0.32544533411661786
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,2048,8,0,0.1946186621983846
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,2048,8,0,0.20052266120910645
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,2048,64,0,0.14061333735783896
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,2048,16,0,0.17297599713007608
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,2048,16,0,0.16084266702334085
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,2048,32,0,0.1492800017197927
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,2048,32,0,0.15122666954994202
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,2048,64,0,0.1353653371334076
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,2048,2,0,1.1019413471221924
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,2048,128,0,0.1253866652647654
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,2048,128,0,0.15169599652290344
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,2048,1,0,2.129765351613363
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,2048,1,0,2.1261119842529297
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,2048,2,0,1.1075039704640706
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,2048,4,0,0.5905973513921102
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,2048,32,0,0.15591999888420105
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,2048,4,0,0.5996479988098145
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,2048,8,0,0.33908267815907794
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,2048,8,0,0.33431466420491535
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,2048,16,0,0.20770132541656494
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,2048,16,0,0.20949333906173706
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,2048,32,0,0.17166399955749512
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,2048,64,0,0.15901333093643188
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,2048,64,0,0.15582933028539023
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,2048,128,0,0.13005866607030234
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,2048,128,0,0.1455413301785787
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,2048,1,0,4.1468000411987305
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,2048,1,0,4.174490610758464
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,2048,2,0,2.136319955190023
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,2048,16,0,0.36114664872487384
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,2048,2,0,2.14846404393514
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,2048,4,0,1.1205120086669922
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,2048,4,0,1.1245120366414387
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,2048,8,0,0.6184426546096802
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,2048,8,0,0.6189333200454712
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,2048,16,0,0.3656800190607707
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,2048,32,0,0.25142399470011395
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,2048,32,0,0.25113600492477417
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,2048,64,0,0.18321067094802856
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,2048,64,0,0.17837866147359213
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,2048,128,0,0.17290133237838745
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,2048,128,0,0.13801599542299905
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,2048,1,0,8.289775848388672
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,2048,8,0,1.174346685409546
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,2048,8,0,1.1824426651000977
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,2048,1,0,8.287013371785482
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,2048,2,0,4.210943857828776
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,2048,2,0,4.195706685384114
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,2048,4,0,2.1821707089742026
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,2048,4,0,2.1763466199239097
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,2048,16,0,0.6729599634806315
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,2048,16,0,0.6731359958648682
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,2048,32,0,0.44791467984517414
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,2048,32,0,0.44396265347798664
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,2048,64,0,0.30766934156417847
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,2048,64,0,0.30638400713602704
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,2048,128,0,0.23340266942977905
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,2048,128,0,0.2332586646080017
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,32,2048,2,0,8.370437622070312
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,32,2048,1,0,16.457482655843098
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,32,2048,4,0,4.319082578023274
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,32,2048,2,0,8.320789337158203
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,32,2048,4,0,4.300325393676758
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,32,2048,32,0,0.8354079723358154
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,32,2048,1,0,16.48349889119466
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,32,2048,8,0,2.304464022318522
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,32,2048,8,0,2.2927680015563965
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,32,2048,64,0,0.5600533485412598
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,32,2048,16,0,1.2809546788533528
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,32,2048,16,0,1.2751466433207195
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,32,2048,32,0,0.8322827021280924
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,32,2048,64,0,0.5583039919535319
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,32,2048,128,0,0.4220053354899089
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,32,2048,128,0,0.42370132605234784
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,3072,4,0,0.29706132411956787
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,3072,1,0,0.9672746658325195
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,3072,1,0,0.9692746798197428
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,3072,2,0,0.5240746736526489
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,3072,2,0,0.528058648109436
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,3072,4,0,0.2956586678822835
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,3072,8,0,0.18547733624776205
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,3072,8,0,0.18899200359980264
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,3072,16,0,0.13380266229311624
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,3072,64,0,0.1511733333269755
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,3072,16,0,0.1301706631978353
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,3072,32,0,0.15171200037002563
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,3072,32,0,0.1502346694469452
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,3072,64,0,0.17011199394861856
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,3072,128,0,0.1388746698697408
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,3072,128,0,0.1423786679903666
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,3072,1,0,1.8633813858032227
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,3072,1,0,1.874176025390625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,3072,2,0,0.9756800333658854
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,3072,2,0,0.9782400131225586
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,3072,16,0,0.19464532534281412
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,3072,4,0,0.5362079938252767
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,3072,4,0,0.5353920062383016
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,3072,8,0,0.3107466697692871
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,3072,8,0,0.308624009291331
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,3072,16,0,0.19484800100326538
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,3072,32,0,0.15803200006484985
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,3072,32,0,0.14958932995796204
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,3072,64,0,0.1495733360449473
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,3072,1,0,3.6780532201131186
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,3072,64,0,0.17916800578435263
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,3072,128,0,0.16234667102495828
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,3072,128,0,0.1609386702378591
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,3072,1,0,3.6589333216349282
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,3072,2,0,1.8817812601725261
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,3072,16,0,0.3293066620826721
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,3072,2,0,1.890394687652588
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,3072,4,0,0.9980426629384359
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,3072,4,0,1.0007786750793457
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,3072,8,0,0.552895983060201
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,3072,8,0,0.5502026478449503
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,3072,16,0,0.32945066690444946
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,3072,32,0,0.22712532679239908
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,3072,32,0,0.22466667493184408
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,3072,64,0,0.17829867204030356
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,3072,64,0,0.1701386570930481
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,3072,128,0,0.16755733887354532
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,3072,128,0,0.17646400133768717
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,3072,1,0,7.264837265014648
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,3072,2,0,3.7071946461995444
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,3072,8,0,1.0499733289082844
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,3072,1,0,7.260704040527344
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,3072,2,0,3.717013359069824
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,3072,4,0,1.9282666842142742
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,3072,4,0,1.9293546676635742
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,3072,8,0,1.046026627222697
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,3072,16,0,0.591759999593099
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,3072,16,0,0.5906399885813395
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,3072,32,0,0.39531199137369794
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,3072,32,0,0.3917493422826131
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,3072,64,0,0.2683680057525635
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,3072,64,0,0.26870934168497723
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,3072,128,0,0.2020639975865682
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,3072,128,0,0.20454400777816772
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,3072,1,0,14.443312327067057
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,3072,2,0,7.406272252400716
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,3072,8,0,2.003941377003988
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,3072,8,0,2.0053280194600425
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,3072,1,0,14.438196818033854
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,3072,2,0,7.318592071533203
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,3072,4,0,3.815135955810547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,3072,4,0,3.796293258666992
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,3072,16,0,1.1216426690419514
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,3072,16,0,1.1211573282877605
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,3072,32,0,0.7165599664052328
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,3072,32,0,0.7179253101348877
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,3072,64,0,0.47551465034484863
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,3072,64,0,0.47596800327301025
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,3072,128,0,0.3551520109176636
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,3072,128,0,0.35628799597422284
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,4096,1,0,1.4664907455444336
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,4096,1,0,1.470037301381429
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,4096,8,0,0.2634933392206828
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,4096,16,0,0.1736853321393331
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,4096,2,0,0.7750986417134603
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,4096,2,0,0.775381326675415
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,4096,32,0,0.1681600014368693
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,4096,4,0,0.42659199237823486
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,4096,4,0,0.4293760061264038
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,4096,8,0,0.2590773304303487
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,4096,16,0,0.1763253410657247
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,4096,32,0,0.1637440025806427
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,4096,64,0,0.14261866609255472
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,4096,64,0,0.1692319909731547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,4096,128,0,0.1625226636727651
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,4096,128,0,0.1477120021979014
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,4096,1,0,2.847968101501465
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,4096,1,0,2.8765172958374023
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,4096,8,0,0.44356799125671387
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,4096,16,0,0.2676266630490621
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,4096,2,0,1.473482608795166
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,4096,2,0,1.4850880304972331
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,4096,4,0,0.7868586381276449
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,4096,4,0,0.7906986872355143
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,4096,8,0,0.4394079844156901
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,4096,16,0,0.2723840077718099
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,4096,32,0,0.18226132790247598
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,4096,32,0,0.18249066670735678
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,4096,64,0,0.14936000108718872
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,4096,64,0,0.17784533898035684
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,4096,128,0,0.14070933063824972
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,4096,128,0,0.16779732704162598
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,4096,1,0,5.622106552124023
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,4096,1,0,5.623141606648763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,4096,8,0,0.8094346523284912
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,4096,2,0,2.856186548868815
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,4096,4,0,1.4988266626993816
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,4096,2,0,2.8737920125325522
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,4096,4,0,1.5089653333028157
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,4096,8,0,0.8104106585184733
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,4096,64,0,0.21955200036366782
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,4096,16,0,0.4705333312352498
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,4096,16,0,0.47274665037790936
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,4096,32,0,0.2943039933840434
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,4096,32,0,0.29677865902582806
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,4096,64,0,0.22055999437967935
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,4096,1,0,11.156283060709635
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,4096,2,0,5.667338689168294
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,4096,4,0,2.903530756632487
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,4096,128,0,0.1732693314552307
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,4096,128,0,0.15801599621772766
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,4096,1,0,11.152058919270834
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,4096,2,0,5.6334184010823565
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,4096,32,0,0.5239200194676717
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,4096,4,0,2.918309211730957
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,4096,8,0,1.5361599922180176
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,4096,8,0,1.5448853174845378
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,4096,16,0,0.8645439942677816
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,4096,16,0,0.8693013191223145
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,4096,32,0,0.5255359808603922
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,4096,64,0,0.36961066722869873
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,4096,64,0,0.3698933521906535
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,4096,128,0,0.2724799911181132
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,4096,128,0,0.27265600363413495
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,16,4096,2,0,11.231119791666666
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,16,4096,1,0,24.28343454996745
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,16,4096,4,0,5.769189198811849
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,16,4096,16,0,1.6456373532613118
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,16,4096,1,0,22.02397918701172
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,16,4096,2,0,11.240559895833334
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,16,4096,4,0,5.775429407755534
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,16,4096,8,0,3.0370187759399414
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,16,4096,8,0,3.038874626159668
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,16,4096,128,0,0.48205868403116864
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,16,4096,16,0,1.653648058573405
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,16,4096,32,0,0.9676907062530518
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,6144,1,0,2.689215977986654
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,16,4096,32,0,0.970421314239502
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,16,4096,64,0,0.666810671488444
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,16,4096,64,0,0.666757345199585
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,16,4096,128,0,0.4842613140741984
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,6144,1,0,2.691989262898763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,6144,2,0,1.3977600733439128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,6144,2,0,1.4171627362569172
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,6144,4,0,0.7558826605478922
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,6144,4,0,0.7611626784006754
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,6144,8,0,0.42989333470662433
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,6144,8,0,0.4356906811396281
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,6144,16,0,0.2723626693089803
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,6144,16,0,0.26819199323654175
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,6144,32,0,0.1833440065383911
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,6144,32,0,0.1934986710548401
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,6144,64,0,0.1471680005391439
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,6144,64,0,0.17849600315093994
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,6144,128,0,0.1339413324991862
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,6144,4,0,1.4169440269470215
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,6144,128,0,0.17294400930404663
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,6144,1,0,5.309797286987305
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,6144,1,0,5.290330568949382
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,6144,2,0,2.7067785263061523
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,6144,2,0,2.722357432047526
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,6144,4,0,1.4259146054585774
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,6144,8,0,0.7698826789855957
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,6144,8,0,0.7720586458841959
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,6144,16,0,0.4531253178914388
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,6144,16,0,0.45639467239379883
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,6144,32,0,0.2885333299636841
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,6144,32,0,0.2868959903717041
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,6144,64,0,0.2147093415260315
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,6144,64,0,0.21916800737380981
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,6144,128,0,0.156549334526062
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,6144,128,0,0.16769067446390787
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,6144,1,0,10.662895838419596
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,6144,2,0,5.3571624755859375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,6144,1,0,10.565157572428385
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,6144,2,0,5.371402740478516
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,6144,4,0,2.760016123453776
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,6144,4,0,2.7644532521565757
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,6144,8,0,1.4665385882059734
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,6144,8,0,1.4644373257954915
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,6144,16,0,0.8139359951019287
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,6144,128,0,0.25174933671951294
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,6144,128,0,0.25247466564178467
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,6144,16,0,0.8200960159301758
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,6144,32,0,0.49722135066986084
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,6144,32,0,0.49369601408640545
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,6144,64,0,0.3402719895044963
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,6144,1,0,21.108314514160156
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,6144,2,0,10.57802645365397
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,6144,4,0,5.4418989817301435
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,6144,64,0,0.3452479839324951
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,6144,1,0,20.90852228800456
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,6144,16,0,1.5368960698445637
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,6144,2,0,10.710219065348307
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,6144,4,0,5.434426625569661
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,6144,64,0,0.6028586626052856
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,6144,8,0,2.8137280146280923
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,6144,16,0,1.5466453234354656
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,6144,8,0,2.8272425333658853
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,6144,32,0,0.902346690495809
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,6144,32,0,0.9021493593851725
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,6144,64,0,0.6053920189539591
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,6144,128,0,0.4280480146408081
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,6144,128,0,0.4288533528645833
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,8192,1,0,4.348437309265137
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,8192,1,0,4.361776034037272
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,8192,8,0,0.6504000027974447
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,8192,2,0,2.2228906949361167
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,8192,2,0,2.2307680447896323
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,8192,4,0,1.1812160015106201
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,8192,4,0,1.1759040355682373
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,8192,8,0,0.6495039860407511
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,8192,16,0,0.49156800905863446
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,8192,128,0,0.1943839987119039
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,8192,16,0,0.39112532138824463
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,8192,32,0,0.2576693296432495
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,8192,32,0,0.2622293432553609
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,8192,64,0,0.17753599087397257
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,8192,64,0,0.17469332615534464
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,8192,128,0,0.1567573348681132
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,8192,1,0,8.579013188680014
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,8192,1,0,8.569210688273111
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,8192,2,0,4.325818697611491
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,8192,2,0,4.346698760986328
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,8192,4,0,2.2482239405314126
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,8192,4,0,2.252240022023519
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,8192,8,0,1.1977759997049968
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,8192,8,0,1.2015573183695476
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,8192,64,0,0.28597333033879596
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,8192,16,0,0.686954657236735
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,8192,16,0,0.687274694442749
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,8192,32,0,0.41785601774851483
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,8192,32,0,0.4135040044784546
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,8192,64,0,0.2799466649691264
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,8192,128,0,0.44458667437235516
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,8192,128,0,0.21277866760889688
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,8192,1,0,17.192378997802734
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,8192,8,0,2.2958827018737793
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,8192,2,0,8.594634373982748
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,8192,16,0,1.2544106642405193
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,8192,1,0,17.386389414469402
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,8192,2,0,8.700234731038412
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,8192,4,0,4.391567866007487
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,8192,4,0,4.429098765055339
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,8192,8,0,2.3000426292419434
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,8192,16,0,1.249125321706136
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,8192,128,0,0.350106676419576
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,8192,32,0,0.7417546908060709
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,8192,32,0,0.7321493625640869
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,8192,64,0,0.4721706708272298
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,8192,64,0,0.4766133228937785
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,8192,128,0,0.34969067573547363
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,8,8192,2,0,17.3611577351888
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,8,8192,2,0,17.006484985351562
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,8,8192,1,0,34.31911977132162
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,8,8192,4,0,8.829429626464844
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,8,8192,1,0,34.48777516682943
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,8,8192,4,0,8.828618367513021
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,8,8192,8,0,4.534042676289876
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,8,8192,8,0,4.504650751749675
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,8,8192,16,0,2.4073492685953775
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,8,8192,16,0,2.401007970174154
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,8,8192,32,0,1.3640799522399902
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,8,8192,32,0,1.3622454007466633
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,8,8192,64,0,0.8466026782989502
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,8,8192,64,0,0.8499840100606283
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,8,8192,128,0,0.6081653436024984
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,8,8192,128,0,0.6070719957351685
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,10240,1,0,6.402650833129883
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,10240,2,0,3.218181292215983
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,10240,1,0,6.403647740681966
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,10240,2,0,3.230602582295736
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,10240,4,0,1.6770505905151367
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,10240,32,0,0.33635199069976807
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,10240,4,0,1.6919199625651042
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,10240,8,0,0.9159039656321207
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,10240,128,0,0.17524800697962442
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,10240,8,0,0.9118560155232748
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,10240,16,0,0.5272853374481201
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,10240,16,0,0.5348480145136515
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,10240,32,0,0.34065600236256915
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,10240,64,0,0.23635733127593994
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,10240,64,0,0.2487679918607076
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,10240,2,0,6.439151763916016
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,10240,128,0,0.18460800250371298
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,10240,1,0,12.940480550130209
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,10240,2,0,6.3920853932698565
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,10240,1,0,13.185860951741537
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,10240,4,0,3.252565383911133
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,10240,16,0,0.9538826942443848
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,10240,32,0,0.5651573340098063
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,10240,4,0,3.2706772486368814
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,10240,8,0,1.7369866371154785
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,10240,8,0,1.7242026329040527
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,10240,128,0,0.2765973409016927
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,10240,16,0,0.9539413452148438
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,10240,32,0,0.562719980875651
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,10240,64,0,0.3713013331095378
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,10240,64,0,0.37214934825897217
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,10240,2,0,12.959701538085938
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,10240,4,0,6.372751871744792
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,10240,128,0,0.2804853320121765
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,10240,2,0,13.104836781819662
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,10240,16,0,1.7905227343241374
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,10240,1,0,25.35491180419922
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,10240,1,0,25.601216634114582
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,10240,4,0,6.427455902099609
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,10240,8,0,3.3112640380859375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,10240,64,0,0.6405280033747355
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,10240,128,0,0.45768535137176514
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,10240,8,0,3.3169387181599936
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,10240,16,0,1.7754400571187336
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,10240,32,0,1.0192480087280273
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,10240,32,0,1.0087947050730388
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,10240,64,0,0.6389866669972738
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,10240,128,0,0.4596853256225586
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,12288,1,0,8.764415740966797
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,12288,2,0,4.41268793741862
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,12288,1,0,8.76966921488444
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,12288,2,0,4.384559949239095
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,12288,4,0,2.2984159787495932
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,12288,4,0,2.280874729156494
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,12288,8,0,1.2348053455352783
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,12288,8,0,1.231706698735555
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,12288,16,0,0.7007306416829427
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,12288,16,0,0.7026879787445068
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,12288,128,0,0.20637865861256918
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,12288,32,0,0.4337759812672933
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,12288,32,0,0.4284426768620809
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,12288,64,0,0.2967626651128133
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,12288,2,0,8.68614387512207
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,12288,64,0,0.2943360010782878
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,12288,128,0,0.20941332976023355
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,12288,1,0,17.843616485595703
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,12288,8,0,2.3221119244893393
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,12288,1,0,17.79635747273763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,12288,2,0,8.79965845743815
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,12288,4,0,4.485109329223633
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,12288,8,0,2.342367966969808
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,12288,4,0,4.485909461975098
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,12288,16,0,1.266693353652954
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,12288,16,0,1.2766773700714111
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,12288,128,0,0.34461331367492676
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,12288,32,0,0.7339466412862142
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,12288,32,0,0.7370773156483968
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,12288,64,0,0.4713013172149658
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,12288,64,0,0.46619200706481934
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,12288,128,0,0.35205332438151044
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,12288,2,0,17.874197642008465
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,12288,1,0,35.33052317301432
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,12288,4,0,9.29693857828776
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,12288,16,0,2.405029296875
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,12288,2,0,17.96836217244466
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,12288,1,0,36.180102030436196
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,12288,4,0,9.116618474324545
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,12288,8,0,4.5672000249226885
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,12288,8,0,4.536101341247559
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,12288,16,0,2.4221067428588867
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,12288,32,0,1.3558133443196614
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,12288,32,0,1.3489599227905273
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,12288,64,0,0.8157280286153158
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,12288,64,0,0.8265706698099772
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,12288,128,0,0.5787786642710367
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,12288,128,0,0.5753866831461588
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,16384,1,0,15.10595703125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,16384,2,0,7.869973500569661
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,16384,1,0,14.727962493896484
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,16384,2,0,7.54745610555013
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,16384,4,0,3.747685432434082
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,16384,4,0,3.789344151814779
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,16384,8,0,1.9807146390279133
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,16384,8,0,1.9807039896647136
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,16384,16,0,1.0940159956614177
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,16384,16,0,1.0953760147094727
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,16384,32,0,0.6570239861806234
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,16384,32,0,0.6491466760635376
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,16384,64,0,0.4254240194956462
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,16384,64,0,0.44099199771881104
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,16384,128,0,0.29687466224034625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,16384,128,0,0.29230932394663495
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,16384,2,0,15.005018870035807
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,16384,1,0,30.2838134765625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,16384,2,0,15.277109781901041
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,16384,4,0,7.507274627685547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,16384,1,0,30.472117106119793
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,16384,4,0,7.5077972412109375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,16384,8,0,3.8143787384033203
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,16384,8,0,3.822655995686849
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,16384,16,0,2.038047949473063
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,16384,16,0,2.0434346199035645
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,16384,32,0,1.1378026803334553
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,16384,32,0,1.1546506881713867
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,16384,64,0,0.7065172990163168
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,16384,64,0,0.7057120005289713
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,16384,128,0,0.47818132241566974
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,16384,128,0,0.48263466358184814
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,4,16384,4,0,15.520464579264322
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,4,16384,2,0,29.895179748535156
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,4,16384,2,0,30.76018778483073
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,4,16384,4,0,14.85748291015625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,4,16384,8,0,7.663920084635417
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,4,16384,8,0,7.823792139689128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,4,16384,32,0,2.1348373095194497
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,4,16384,1,0,60.68294779459635
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,4,16384,16,0,3.8991521199544272
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,4,16384,16,0,3.9281813303629556
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,4,16384,1,0,60.61380513509115
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,4,16384,32,0,2.1152159372965493
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,4,16384,64,0,1.26146133740743
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,4,16384,64,0,1.2550933361053467
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,4,16384,128,0,0.826469341913859
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,4,16384,128,0,0.8249493439992269
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,1,32768,4,0,14.201141357421875
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,1,32768,2,0,28.196533203125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,1,32768,2,0,28.408292134602863
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,1,32768,1,0,56.532735188802086
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,1,32768,16,0,3.6250667572021484
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,1,32768,8,0,6.962026596069336
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,1,32768,4,0,14.8853759765625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,1,32768,8,0,6.927114486694336
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,1,32768,16,0,3.612314542134603
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,1,32768,1,0,56.51093037923177
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,1,32768,32,0,1.959173361460368
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,1,32768,32,0,1.9551787376403809
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,1,32768,64,0,1.1468640168507893
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,1,32768,64,0,1.1446773211161296
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,1,32768,128,0,0.7343253294626871
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,1,32768,128,0,0.7386079629262289
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,32,2,32768,4,0,28.383140563964844
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,16,2,32768,8,0,17.821083068847656
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,32,2,32768,4,0,28.39007568359375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,16,2,32768,8,0,14.280143737792969
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,64,2,32768,2,0,56.66387939453125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,8,2,32768,16,0,11.107264200846354
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,4,2,32768,32,0,3.6944586435953775
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,2,2,32768,64,0,2.0597972869873047
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,64,2,32768,2,0,56.2836659749349
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,8,2,32768,16,0,7.171525319417317
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,4,2,32768,32,0,3.7119948069254556
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,2,2,32768,64,0,2.069904009501139
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,128,2,32768,1,0,111.94261678059895
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flash_attn_mla,float16,float16,1,2,32768,128,0,1.261189301808675
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,128,2,32768,1,0,112.42818196614583
VLLM,0.14.0,NVIDIA H100 80GB HBM3,context_mla,vllm_flashmla,float16,fp8,1,2,32768,128,0,1.2595146497090657
