framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,1,0.13211199641227722
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,1,0.10268800457318623
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,1,0.1295253336429596
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,1,0.10043199857076009
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,1,0.14892799655596414
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,1,0.13543466726938883
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,1,0.1328480045000712
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,1,0.09784000118573506
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,1,0.14680000146230063
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,1,0.1678239901860555
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,1,0.15330666303634644
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,1,0.13217600186665854
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,1,0.10114666819572449
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,1,0.10149866342544556
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,1,0.10494933525721233
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,1,0.16496533155441284
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,1,0.1316266655921936
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,1,0.1113866666952769
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,1,0.1973386605580648
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,1,0.09818666179974873
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,1,0.13461866974830627
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,1,0.0988106628259023
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,1,0.166485329469045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,1,0.1325440009435018
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,1,0.12310933073361714
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,1,0.19760533173878989
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,1,0.10122666756312053
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,1,0.12587199608484903
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,1,0.10286399722099304
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,1,0.12453333536783855
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,1,0.12733866771062216
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,1,0.12091199556986491
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,1,0.09876799583435059
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,1,0.12276267011960347
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,1,0.1539359986782074
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,1,0.1932906707127889
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,1,0.12959999839464822
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,1,0.19894933700561523
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,1,0.13224533200263977
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,1,0.12900267044703165
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,1,0.1546453336874644
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,1,0.12460800011952718
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,1,0.1578933298587799
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,1,0.15691733360290527
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,1,0.1937546730041504
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,1,0.20131200551986694
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,1,0.15415466825167337
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,1,0.16192533572514853
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,1,0.09639466802279155
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,1,0.1376159985860189
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,1,0.15870933731396994
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,1,0.15237333377202353
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,1,0.1644373337427775
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,1,0.16077333688735962
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,1,0.15646933515866598
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,1,0.11991999546686809
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,1,0.20155733823776245
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,1,0.15520000457763672
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,1,0.11972799897193909
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,1,0.12389866511027019
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,1,0.19670933485031128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,1,0.15946666399637857
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,1,0.1530400017897288
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,1,0.1924533247947693
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,1,0.1058079997698466
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,1,0.1959786613782247
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,1,0.09780266880989075
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,1,0.12524799505869547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,1,0.09763733545939128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,1,0.12405332922935486
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,1,0.10387200117111206
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,1,0.1299679974714915
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,1,0.10002133250236511
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,1,0.20003734032313028
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,1,0.1060693363348643
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,1,0.12407466769218445
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,1,0.10083199540774028
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,1,0.1216266651948293
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,1,0.1516853372255961
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,1,0.2081706722577413
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,1,0.16230400403340658
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,1,0.12125333150227864
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,1,0.09563733140627544
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,1,0.09938666224479675
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,1,0.12155200044314067
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,1,0.09814400474230449
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,1,0.12612266341845194
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,1,0.09925333658854167
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,1,0.16577066977818808
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,1,0.12492266297340393
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,1,0.10744000474611919
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,1,0.13864533106486002
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,1,0.09940800070762634
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,1,0.12867732842763266
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,1,0.09674132863680522
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,1,0.12667733430862427
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,1,0.10444266597429912
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,1,0.12418133020401001
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,1,0.09219732880592346
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,1,0.24713067213694254
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,1,0.10302933057149251
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,1,0.09970133503278096
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,1,0.1267306705315908
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,1,0.09885866443316142
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,1,0.1253439982732137
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,1,0.14849066734313965
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,1,0.15537066260973612
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,1,0.12958932916323343
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,1,0.14829867084821066
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,1,0.19562133153279623
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,1,0.12091199556986491
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,1,0.197978675365448
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,1,0.1607200006643931
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,1,0.12809066971143088
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,1,0.09803199768066406
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,1,0.1284266710281372
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,1,0.09934399525324504
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,1,0.09869866569836934
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,1,0.12994133432706198
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,1,0.09693866968154907
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,1,0.1237600048383077
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,1,0.12096533179283142
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,1,0.15552533666292825
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,1,0.10457066694895427
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,1,0.15969066818555197
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,1,0.1966879963874817
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,1,0.14542933305104574
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,1,0.158160001039505
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,1,0.15891200304031372
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,1,0.12263466914494832
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,1,0.19878933827082315
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,1,0.09919466574986775
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,1,0.19856532414754233
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,1,0.1291306714216868
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,1,0.10692266623179118
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,1,0.15319466590881348
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,1,0.13471466302871704
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,1,0.21035200357437134
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,1,0.12081600228945415
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,1,0.15678933262825012
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,1,0.16406933466593424
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,1,0.16326933105786642
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,1,0.1362613340218862
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,1,0.15547200043996176
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,1,0.40212265650431317
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,1,0.3459306557973226
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,1,0.1801919937133789
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,1,0.13315733273824057
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,1,0.21435733636220297
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,1,0.14497066537539163
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,1,0.10244266192118327
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,1,0.16171200076738992
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,1,0.24503999948501587
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,1,0.14867200454076132
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,1,0.09639466802279155
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,1,0.12495999534924825
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,1,0.20578134059906006
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,1,0.1048426628112793
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,1,0.10310399532318115
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,1,0.13358400265375772
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,1,0.3280533353487651
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,1,0.6497546831766764
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,1,0.17857599258422852
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,1,0.4042400121688843
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,1,0.11429333686828613
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,1,0.2007946570714315
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,1,0.24569066365559897
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,1,0.21730667352676392
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,1,0.27321066459019977
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,1,0.1639306644598643
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,1,0.13889066378275552
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,1,0.15980799992879233
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,3,0.09445333480834961
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,3,0.15626666943232217
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,1,0.20932799577713013
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,1,0.12874133388201395
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,3,0.11874133348464966
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,3,0.1258026659488678
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,3,0.11751466989517212
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,3,0.14456533392270407
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,1,0.15412267049153647
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,3,0.1675893266995748
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,1,0.1951893369356791
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,3,0.1869279940923055
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,3,0.150709331035614
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,3,0.1520639955997467
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,3,0.14864533146222433
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,3,0.15760533014933267
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,3,0.15717867016792297
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,3,0.1969119906425476
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,3,0.11819199721018474
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,3,0.18676799535751343
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,3,0.09451733032862346
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,3,0.1265066663424174
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,3,0.20197333892186484
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,3,0.19236799081166586
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,3,0.11943466464678447
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,3,0.1850879987080892
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,3,0.11333866914113362
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,3,0.18289599816004434
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,3,0.1651893357435862
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,3,0.15389866630236307
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,3,0.15064000089963278
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,3,0.19404266277949014
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,3,0.12265599767367046
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,3,0.18259199460347494
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,3,0.15008533000946045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,3,0.15246933698654175
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,3,0.09524800380071004
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,3,0.12398399909337361
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,3,0.15004799763361612
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,3,0.16605866948763529
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,3,0.16351466377576193
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,3,0.12788266936937967
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,3,0.12266133228937785
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,3,0.15454933047294617
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,3,0.14829333623250326
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,3,0.19828800360361734
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,3,0.14970133701960245
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,3,0.15870933731396994
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,3,0.16435733437538147
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,3,0.19727466503779092
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,3,0.11713066697120667
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,3,0.15533333023389181
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,3,0.15762666861216226
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,3,0.12346133589744568
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,3,0.14819199840227762
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,3,0.16133866707483926
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,3,0.1660160024960836
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,3,0.19713066021601358
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,3,0.11656000216801961
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,3,0.15406933426856995
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,3,0.14574933052062988
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,3,0.1914880077044169
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,3,0.1548799971739451
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,3,0.15129066507021585
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,3,0.1564906636873881
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,3,0.16411200165748596
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,3,0.19385600090026855
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,3,0.11580800016721089
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,3,0.13909332950909933
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,3,0.13303466637929282
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,3,0.09845866759618123
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,3,0.1156213382879893
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,3,0.1275200049082438
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,3,0.11809066931406657
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,3,0.14202133814493814
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,3,0.1532853345076243
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,3,0.154831995566686
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,3,0.1612106661001841
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,3,0.15158933401107788
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,3,0.15129066507021585
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,3,0.2068693240483602
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,3,0.14655466874440512
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,3,0.0962666670481364
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,3,0.15710399548212686
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,3,0.19641600052515665
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,3,0.09546132882436116
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,3,0.15639467040697733
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,3,0.12502933541933695
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,3,0.09939199686050415
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,3,0.19645865758260092
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,3,0.1495199998219808
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,3,0.12853333353996277
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,3,0.16062933206558228
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,3,0.12307733297348022
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,3,0.15205867091814676
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,3,0.11888532837231953
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,3,0.19634666045506796
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,3,0.18950400749842325
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,3,0.15196800231933594
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,3,0.16274133324623108
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,3,0.10058133800824483
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,3,0.12626666824022928
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,3,0.11609599987665813
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,3,0.12476266423861186
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,3,0.09802132844924927
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,3,0.19476266702016196
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,3,0.1502026617527008
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,3,0.1593706707159678
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,3,0.12924800316492716
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,3,0.12785599629084268
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,3,0.12543466687202454
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,3,0.15276267131169638
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,3,0.12286399801572163
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,3,0.1927093267440796
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,3,0.15218666195869446
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,3,0.15426666537920633
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,3,0.1590773363908132
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,3,0.15708800156911215
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,3,0.1482186714808146
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,3,0.1029866635799408
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,3,0.1313759982585907
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,3,0.15307199954986572
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,3,0.1982133388519287
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,3,0.12404800454775493
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,3,0.12941333651542664
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,3,0.15625066558519998
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,3,0.15330666303634644
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,3,0.1518400013446808
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,3,0.1535253326098124
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,3,0.1909866730372111
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,3,0.14867732922236124
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,3,0.15576533476511636
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,3,0.16078933080037436
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,3,0.19513066609700522
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,3,0.14988799889882407
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,3,0.16351999839146933
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,3,0.11001066366831462
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,3,0.1283253331979116
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,3,0.09465066591898601
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,3,0.15245333313941956
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,3,0.12088533242543538
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,3,0.19749333461125693
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,3,0.15913599729537964
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,3,0.27187200387318927
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,3,0.196234663327535
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,3,0.12020799517631531
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,3,0.15310933192571005
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,3,0.34907201925913495
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,3,0.15172800421714783
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,3,0.32832000652949017
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,3,0.1882986625035604
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,3,0.21276267369588217
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,3,0.11994133392969768
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,3,0.15607999761899313
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,3,0.15278933445612589
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,3,0.19433599710464478
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,3,0.12150399883588155
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,3,0.156741331020991
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,3,0.1446293294429779
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,3,0.16320000092188516
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,3,0.12004799644152324
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,3,0.16702399651209512
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,3,0.15340266625086466
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,3,0.1917333404223124
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,3,0.3278613289197286
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,3,0.6488639911015829
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,3,0.1768640081087748
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,3,0.40192532539367676
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,3,0.13727466265360513
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,3,0.2674186627070109
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,3,0.155130664507548
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,3,0.20601600408554077
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,3,0.15747732917467752
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,3,0.2055093248685201
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,3,0.16098666191101074
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,7,0.12350933750470479
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,3,0.17846399545669556
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,3,0.20241065820058188
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,3,0.15832533439000449
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,7,0.15107199549674988
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,7,0.20641599098841348
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,3,0.15979199608167013
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,7,0.19199466705322266
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,7,0.16244266430536905
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,3,0.2099306583404541
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,7,0.19371734062830606
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,7,0.12087466319402058
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,7,0.19470399618148804
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,7,0.14894933501879373
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,7,0.19000534216562906
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,7,0.14898133277893066
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,7,0.1959999998410543
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,7,0.14713600277900696
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,7,0.1746506690979004
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,7,0.1197653313477834
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,7,0.19539733727773032
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,7,0.09637866417566936
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,7,0.11795199910799663
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,7,0.19944000244140625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,7,0.14879467089970908
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,7,0.12290666500727336
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,7,0.20254399379094443
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,7,0.14856533209482828
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,7,0.17144532998402914
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,7,0.20016533136367798
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,7,0.1490559975306193
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,7,0.15338133772214255
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,7,0.10055466492970784
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,7,0.1546293298403422
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,7,0.1874720056851705
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,7,0.1483573317527771
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,7,0.14898666739463806
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,7,0.17157334089279175
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,7,0.12370133399963379
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,7,0.1183093289534251
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,7,0.15240533153216043
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,7,0.15432533621788025
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,7,0.1879253387451172
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,7,0.14268799622853598
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,7,0.1909066637357076
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,7,0.15503999590873718
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,7,0.19356799125671387
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,7,0.11804800232251485
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,7,0.09539199868837993
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,7,0.15270400047302246
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,7,0.1490506629149119
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,7,0.1919040083885193
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,7,0.12265066305796306
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,7,0.15077333648999533
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,7,0.19269333283106485
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,7,0.12416533629099528
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,7,0.20778665939966837
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,7,0.1197706659634908
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,7,0.20055466890335083
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,7,0.15769599874814352
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,7,0.14645866552988687
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,7,0.15396266182263693
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,7,0.1515679955482483
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,7,0.19739200671513876
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,7,0.15497066577275595
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,7,0.2002026637395223
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,7,0.1390613317489624
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,7,0.1990399956703186
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,7,0.15225066741307577
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,7,0.20400534073511759
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,7,0.1179200013478597
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,7,0.15477333466211954
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,7,0.12411733468373616
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,7,0.1986400087674459
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,7,0.15448533495267233
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,7,0.203658660252889
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,7,0.14293866356213888
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,7,0.158869336048762
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,7,0.15333867073059082
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,7,0.15556800365447998
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,7,0.20374399423599243
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,7,0.15702399611473083
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,7,0.20088533560434976
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,7,0.19580266873041788
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,7,0.15813333789507547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,7,0.19928000370661417
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,7,0.10217066605885823
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,7,0.12391466895739238
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,7,0.15731733043988547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,7,0.10993599891662598
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,7,0.15759467085202536
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,7,0.12494400143623352
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,7,0.19827200969060263
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,7,0.12232533097267151
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,7,0.17519466082255045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,7,0.11828800042470296
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,7,0.20089600483576456
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,7,0.15317866206169128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,7,0.15440533558527628
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,7,0.12505599856376648
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,7,0.19537599881490073
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,7,0.09738133351008098
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,7,0.12398933370908101
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,7,0.1164533297220866
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,7,0.19770665963490805
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,7,0.15476266543070474
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,7,0.1980959971745809
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,7,0.12293333808581035
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,7,0.18253332376480103
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,7,0.11844799915949504
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,7,0.17681600650151572
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,7,0.1251359979311625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,7,0.19739733139673868
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,7,0.12060266733169556
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,7,0.1577173372109731
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,7,0.12154666582743327
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,7,0.20035199324289957
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,7,0.19223467508951822
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,7,0.09994666775067647
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,7,0.1986666719118754
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,7,0.12802666425704956
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,7,0.12057066957155864
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,7,0.12372266252835591
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,7,0.11866666873296101
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,7,0.11970133582750957
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,7,0.157258669535319
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,7,0.19041067361831665
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,7,0.12204800049463908
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,7,0.15600533286730447
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,7,0.15220266580581665
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,7,0.20493866999944052
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,7,0.20533865690231323
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,7,0.1553600033124288
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,7,0.1669386625289917
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,7,0.1978293259938558
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,7,0.12001066406567891
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,7,0.15522666772206625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,7,0.12481066584587097
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,7,0.11940800150235494
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,7,0.19755200544993082
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,7,0.12174399693806966
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,7,0.14893333117167154
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,7,0.156058669090271
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,7,0.20094933112462363
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,7,0.1186293363571167
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,7,0.20006400346755981
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,7,0.12036266922950745
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,7,0.1267733375231425
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,7,0.1527359982331594
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,7,0.3285760084788005
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,7,0.3495519955952962
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,7,0.18995199600855509
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,7,0.21358400583267212
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,7,0.156549334526062
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,7,0.1609653333822886
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,7,0.15151466925938925
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,7,0.1639199952284495
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,7,0.11732266346613567
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,7,0.1525973379611969
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,7,0.15456533432006836
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,7,0.20220800240834555
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,7,0.12190933028856914
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,7,0.19643733898798624
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,7,0.12349866827329
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,7,0.15189866224924722
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,7,0.33989866574605304
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,7,0.6508053143819174
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,7,0.18171733617782593
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,7,0.4026399850845337
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,7,0.1395199994246165
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,7,0.1628213326136271
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,7,0.208570659160614
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,7,0.2604106664657593
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,7,0.12193066875139873
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,7,0.1899306575457255
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,7,0.15635200341542563
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,7,0.16380266348520914
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,15,0.146314670642217
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,15,0.18627200524012247
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,7,0.1602186659971873
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,15,0.116047998269399
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,15,0.14637333154678345
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,15,0.1184266706307729
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,7,0.1644000013669332
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,7,0.16735466321309408
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,15,0.16015467047691345
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,7,0.17638399203618368
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,15,0.12059199810028076
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,15,0.1845973332722982
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,15,0.11513599753379822
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,15,0.15285332997639975
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,15,0.11395733555157979
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,15,0.09371733665466309
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,15,0.19073599576950073
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,15,0.11760532855987549
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,15,0.16033599774042764
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,15,0.11883733669916789
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,15,0.1553653379281362
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,15,0.12331733107566833
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,15,0.14787733554840088
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,15,0.19485867023468018
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,15,0.1536853313446045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,15,0.189082662264506
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,15,0.12085333466529846
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,15,0.15520000457763672
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,15,0.12076266606648763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,15,0.15401066342989603
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,15,0.1455359955628713
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,15,0.19216533501942953
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,15,0.15226667126019797
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,15,0.149125337600708
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,15,0.19859200716018677
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,15,0.1929653286933899
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,15,0.12320533394813538
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,15,0.15427199999491373
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,15,0.09264533718427022
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,15,0.13478400309880575
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,15,0.19613333543141684
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,15,0.1513920029004415
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,15,0.14910399913787842
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,15,0.19242133696873984
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,15,0.11854933698972066
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,15,0.12940800189971924
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,15,0.15691733360290527
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,15,0.12139733632405598
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,15,0.16174399852752686
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,15,0.15079999963442484
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,15,0.15409066279729208
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,15,0.19549334049224854
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,15,0.1922453244527181
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,15,0.09697066744168599
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,15,0.13770133256912231
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,15,0.15689599514007568
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,15,0.12309867143630981
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,15,0.12275733550389607
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,15,0.1933493415514628
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,15,0.1532799998919169
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,15,0.19526400168736777
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,15,0.15252799789110819
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,15,0.1341973344484965
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,15,0.1994719902674357
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,15,0.12257066369056702
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,15,0.16362133622169495
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,15,0.15082133809725443
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,15,0.19584532578786215
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,15,0.10050132870674133
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,15,0.12625599900881448
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,15,0.11336533228556316
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,15,0.19058134158452353
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,15,0.12999999523162842
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,15,0.12505599856376648
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,15,0.15668800473213196
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,15,0.141893337170283
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,15,0.1950719952583313
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,15,0.1497813363869985
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,15,0.15043200055758157
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,15,0.19604800144831339
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,15,0.1290826698144277
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,15,0.1699946721394857
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,15,0.12170132994651794
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,15,0.1961173415184021
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,15,0.10117333134015401
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,15,0.13051199913024902
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,15,0.1926986575126648
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,15,0.13884266217549643
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,15,0.129013329744339
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,15,0.11390399932861328
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,15,0.15821333726247153
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,15,0.12933866182963052
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,15,0.12075733145078023
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,15,0.15732266505559286
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,15,0.2053013245264689
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,15,0.12181333700815837
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,15,0.18406933546066284
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,15,0.12243200341860454
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,15,0.20508267482121786
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,15,0.1561973293622335
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,15,0.09698133667310078
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,15,0.12380799651145935
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,15,0.1587999959786733
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,15,0.1530080040295919
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,15,0.1276800036430359
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,15,0.09607999523480733
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,15,0.20731200774510702
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,15,0.20166399081548056
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,15,0.15035200119018555
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,15,0.15858667095502219
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,15,0.15342400471369425
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,15,0.09725866715113322
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,15,0.16061866283416748
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,15,0.1316426694393158
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,15,0.19545066356658936
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,15,0.14641066392262778
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,15,0.18538665771484375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,15,0.15362667044003805
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,15,0.16098666191101074
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,15,0.14883733789126077
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,15,0.13108799854914346
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,15,0.12425600488980611
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,15,0.15410666664441428
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,15,0.1567306617895762
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,15,0.15152000387509665
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,15,0.16173332929611206
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,15,0.13868266344070435
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,15,0.18769599994023642
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,15,0.12661866346995035
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,15,0.1567626694838206
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,15,0.12562132875124613
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,15,0.19982399543126425
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,15,0.1609333356221517
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,15,0.21113600333531699
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,15,0.13833066821098328
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,15,0.18814400831858316
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,15,0.12888532876968384
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,15,0.20407466093699136
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,15,0.15822399655977884
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,15,0.12841066718101501
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,15,0.12264000376065572
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,15,0.15481066703796387
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,15,0.16085867087046304
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,15,0.18929600715637207
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,15,0.1307146648565928
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,15,0.1989013353983561
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,15,0.12282666563987732
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,15,0.1536960005760193
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,15,0.32813332478205365
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,15,0.3513919909795125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,15,0.18987733125686646
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,15,0.21599467595418295
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,15,0.1332319974899292
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,15,0.20109333594640097
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,15,0.15194666385650635
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,15,0.1240000029404958
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,15,0.2022613286972046
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,15,0.19174933433532715
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,15,0.1593706707159678
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,15,0.19368533293406168
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,15,0.15799466768900552
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,15,0.19874133666356406
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,15,0.15174933274586996
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,15,0.19776000579198202
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,15,0.39310399691263836
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,15,0.3246346712112427
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,15,0.6581013202667236
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,15,0.16038933396339417
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,15,0.2627306580543518
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,15,0.14261333147684732
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,15,0.18436266978581747
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,15,0.19019200404485068
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,15,0.11246933539708455
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,15,0.19901333252588907
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,15,0.1914666692415873
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,31,0.09459199508031209
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,15,0.18664532899856567
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,31,0.1530506710211436
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,31,0.11688533425331116
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,15,0.16140266259511313
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,31,0.19449067115783691
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,31,0.154341330130895
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,15,0.2046239972114563
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,15,0.1665226618448893
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,31,0.194650669892629
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,15,0.20133866866429648
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,31,0.15586666266123453
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,31,0.19404800732930502
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,31,0.15847466389338175
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,31,0.1968266765276591
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,31,0.12158399820327759
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,31,0.19312532742818198
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,31,0.12230933705965678
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,31,0.15871999661127725
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,31,0.15160000324249268
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,31,0.19688000281651816
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,31,0.10368000467618306
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,31,0.12411733468373616
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,31,0.10974933703740437
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,31,0.18804800510406494
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,31,0.14698132872581482
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,31,0.16219733158747354
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,31,0.12452800075213115
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,31,0.19631999731063843
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,31,0.14889599879582724
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,31,0.19216533501942953
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,31,0.16185599565505981
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,31,0.1973386605580648
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,31,0.1482080022493998
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,31,0.15973866979281107
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,31,0.12111467123031616
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,31,0.19509865840276083
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,31,0.10458133618036906
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,31,0.12191466490427653
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,31,0.16383999586105347
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,31,0.20163200298945108
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,31,0.1492586632569631
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,31,0.1916373372077942
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,31,0.11918399731318156
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,31,0.19332265853881836
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,31,0.1529866655667623
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,31,0.1442293326059977
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,31,0.1585653324921926
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,31,0.19328000148137411
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,31,0.1488533318042755
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,31,0.16385599970817566
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,31,0.11860266327857971
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,31,0.20107199748357138
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,31,0.10002133250236511
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,31,0.161189337571462
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,31,0.15684800346692404
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,31,0.1960373322168986
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,31,0.11848533153533936
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,31,0.1964906652768453
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,31,0.12461333473523457
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,31,0.19352000951766968
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,31,0.19500799973805746
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,31,0.14217066764831543
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,31,0.2071253259976705
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,31,0.1536853313446045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,31,0.19799466927846274
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,31,0.11931733290354411
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,31,0.19119999806086221
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,31,0.1232319970925649
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,31,0.19060800472895303
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,31,0.10145066181818645
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,31,0.131221334139506
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,31,0.1588640014330546
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,31,0.09874666730562846
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,31,0.154831995566686
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,31,0.14781866470972696
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,31,0.1629759967327118
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,31,0.15131200353304544
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,31,0.19899733861287436
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,31,0.1243946651617686
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,31,0.2108853260676066
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,31,0.09454400340716045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,31,0.11894399921099345
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,31,0.15610133608182272
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,31,0.14009599884351095
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,31,0.15982400377591452
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,31,0.12753599882125854
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,31,0.20560532808303833
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,31,0.1731520096460978
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,31,0.16058133045832315
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,31,0.09823466340700786
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,31,0.14962666233380637
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,31,0.12533332904179892
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,31,0.12330133716265361
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,31,0.15019733707110086
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,31,0.15657066305478415
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,31,0.20721600453058878
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,31,0.15169599652290344
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,31,0.19568000237147012
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,31,0.15269866585731506
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,31,0.1456000010172526
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,31,0.1490506629149119
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,31,0.12321600317955017
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,31,0.13937600453694662
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,31,0.14986667037010193
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,31,0.15247467160224915
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,31,0.1213759978612264
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,31,0.15238933761914572
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,31,0.16125333309173584
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,31,0.15262400110562643
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,31,0.15800000230471292
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,31,0.16230400403340658
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,31,0.1946773330370585
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,31,0.11508267124493916
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,31,0.19156267245610556
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,31,0.1607039968172709
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,31,0.194650669892629
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,31,0.09940266609191895
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,31,0.1359946628411611
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,31,0.1497760017712911
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,31,0.12247999509175618
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,31,0.15134400129318237
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,31,0.15621866782506308
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,31,0.1458613375822703
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,31,0.17721066872278848
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,31,0.1646613379319509
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,31,0.16172800461451212
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,31,0.15405866503715515
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,31,0.1405333379904429
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,31,0.15439466635386148
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,31,0.16178666551907858
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,31,0.20059200127919516
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,31,0.1506186624368032
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,31,0.15711999932924905
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,31,0.1945599913597107
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,31,0.15768000483512878
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,31,0.1486240029335022
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,31,0.15631999572118124
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,31,0.20114666223526
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,31,0.15255467096964517
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,31,0.19695999224980673
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,31,0.1946559945742289
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,31,0.12012799580891927
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,31,0.16375999649365744
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,31,0.1972586711247762
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,31,0.15502400199572244
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,31,0.15901333093643188
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,31,0.12095466256141663
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,31,0.1976213256518046
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,31,0.3284426728884379
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,31,0.34779731432596844
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,31,0.19342400630315146
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,31,0.2104853391647339
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,31,0.15731733043988547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,31,0.19489065806070963
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,31,0.12210667133331299
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,31,0.20032000541687012
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,31,0.1493333379427592
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,31,0.19640000661214194
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,31,0.14696000019709268
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,31,0.188591996828715
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,31,0.1133013367652893
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,31,0.14657599727312723
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,31,0.19968533515930176
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,31,0.2021226684252421
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,31,0.33001599709192914
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,31,0.649616003036499
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,31,0.18427733580271402
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,31,0.38891200224558514
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,31,0.13312533497810364
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,31,0.25090134143829346
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,31,0.16300266981124878
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,31,0.20295999447504678
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,31,0.15275733669598898
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,31,0.1567306617895762
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,63,0.09455466270446777
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,31,0.15896532932917276
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,31,0.33442668120066327
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,31,0.19937600692113241
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,63,0.18194133043289185
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,63,0.13868266344070435
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,31,0.1588373382886251
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,31,0.15424533685048422
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,63,0.18863999843597412
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,63,0.1166986624399821
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,63,0.1881493330001831
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,31,0.20853332678476968
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,63,0.15101866920789084
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,63,0.175818661848704
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,63,0.139957328637441
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,63,0.14847999811172485
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,63,0.14825066924095154
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,63,0.14640532930692038
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,63,0.1431893308957418
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,63,0.194922665754954
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,63,0.15146133303642273
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,63,0.1980266571044922
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,63,0.19275200366973877
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,63,0.2675466736157735
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,63,0.1887893279393514
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,63,0.14779200156529745
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,63,0.18659732739130655
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,63,0.14808533589045206
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,63,0.1515679955482483
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,63,0.1914400060971578
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,63,0.13800000150998434
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,63,0.14426666498184204
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,63,0.1449066698551178
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,63,0.18980266650517783
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,63,0.14921599626541138
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,63,0.14900267124176025
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,63,0.15347199638684592
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,63,0.17378133535385132
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,63,0.09390399853388469
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,63,0.12131200234095256
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,63,0.15126400192578635
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,63,0.15236799915631613
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,63,0.19182932376861572
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,63,0.1360106666882833
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,63,0.1898933251698812
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,63,0.1569493313630422
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,63,0.1760960022608439
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,63,0.1585706671079
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,63,0.15083199739456177
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,63,0.09308266639709473
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,63,0.19672532876332602
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,63,0.11936533451080322
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,63,0.19277334213256836
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,63,0.12268267075220744
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,63,0.19748266537984213
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,63,0.12544000148773193
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,63,0.14733866850535074
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,63,0.1900213360786438
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,63,0.16369066635767618
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,63,0.15176000197728476
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,63,0.15397866566975912
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,63,0.16395200292269388
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,63,0.15944000085194907
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,63,0.1885653336842855
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,63,0.15050666530927023
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,63,0.19076265891393027
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,63,0.10738666852315266
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,63,0.09738133351008098
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,63,0.16396266222000122
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,63,0.15405866503715515
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,63,0.15921599666277567
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,63,0.10545066992441814
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,63,0.12138666709264119
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,63,0.19131733973821005
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,63,0.1511733333269755
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,63,0.12684266765912375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,63,0.20625066757202148
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,63,0.1980959971745809
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,63,0.1316480040550232
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,63,0.18319465716679892
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,63,0.13660266995429993
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,63,0.19364267587661743
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,63,0.12800000111262003
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,63,0.14727999766667685
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,63,0.15060266852378845
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,63,0.19308799505233765
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,63,0.09887466828028361
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,63,0.15014400084813437
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,63,0.2055520017941793
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,63,0.13731732964515686
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,63,0.18985599279403687
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,63,0.13276267051696777
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,63,0.14903466900189719
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,63,0.17042134205500284
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,63,0.1553386648495992
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,63,0.19724800189336142
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,63,0.12483732899030049
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,63,0.17365866899490356
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,63,0.15027733643849692
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,63,0.17659733692804971
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,63,0.11594133575757344
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,63,0.15574399630228677
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,63,0.1420693298180898
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,63,0.121888001759847
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,63,0.21414933602015176
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,63,0.14891200264294943
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,63,0.17774933576583862
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,63,0.14243732889493307
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,63,0.13397333025932312
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,63,0.1618133286635081
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,63,0.12268799543380737
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,63,0.15503999590873718
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,63,0.15944533546765646
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,63,0.15251200397809347
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,63,0.1941173275311788
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,63,0.1364959975083669
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,63,0.18739734093348184
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,63,0.13691733280817667
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,63,0.13775466879208884
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,63,0.17446933190027872
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,63,0.12574932972590128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,63,0.09889599680900574
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,63,0.12424000104268391
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,63,0.12371733784675598
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,63,0.11875733733177185
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,63,0.1774239937464396
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,63,0.12387733658154805
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,63,0.14812800288200378
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,63,0.11400533715883891
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,63,0.15658666690190634
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,63,0.15018133322397867
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,63,0.15937599539756775
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,63,0.1176479955514272
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,63,0.20066134134928384
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,63,0.19462400674819946
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,63,0.16756266355514526
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,63,0.15634133418401083
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,63,0.18714666366577148
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,63,0.15261333187421164
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,63,0.15384533007939658
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,63,0.12197333574295044
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,63,0.1506239970525106
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,63,0.14332266648610434
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,63,0.15063466628392538
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,63,0.19492799043655396
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,63,0.15169599652290344
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,63,0.17829332749048868
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,63,0.15172800421714783
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,63,0.1551466683546702
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,63,0.11682666341463725
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,63,0.3282080094019572
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,63,0.3472213347752889
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,63,0.19803732633590698
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,63,0.21262933810551962
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,63,0.14627733826637268
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,63,0.20674665768941244
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,63,0.15722666184107462
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,63,0.14787200093269348
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,63,0.13870933651924133
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,63,0.15547200043996176
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,63,0.12484266360600789
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,63,0.20089600483576456
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,63,0.13899200161298117
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,63,0.19177067279815674
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,63,0.15602133671442667
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,63,0.149509330590566
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,63,0.3239413301150004
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,63,0.19210666418075562
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,63,0.6487413247426351
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,63,0.1720586617787679
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,63,0.38839999834696454
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,63,0.1586133340994517
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,63,0.24290666977564493
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,63,0.16165866454442343
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,63,0.14305599530537924
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,63,0.20175466934839884
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,127,0.11017066240310669
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,63,0.15878933668136597
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,63,0.11852266391118367
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,63,0.12238933642705281
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,127,0.15972800056139627
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,127,0.14306132992108664
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,63,0.1844373345375061
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,127,0.1869279940923055
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,127,0.10705600182215373
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,63,0.19738666216532388
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,63,0.15546666582425436
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,127,0.19782400131225586
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,127,0.15666666626930237
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,127,0.1453333298365275
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,127,0.13806399703025818
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,127,0.18239466349283853
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,127,0.10659733414649963
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,127,0.15405333042144775
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,127,0.1206773320833842
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,127,0.19242666165033975
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,127,0.14870400230089822
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,127,0.13760000467300415
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,127,0.0990826686223348
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,127,0.12798933188120523
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,127,0.10925867160161336
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,127,0.1550879975159963
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,127,0.14822399616241455
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,127,0.14989866813023886
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,127,0.1509866714477539
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,127,0.14063466588656107
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,127,0.13723733027776083
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,127,0.18621333440144858
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,127,0.13581333557764688
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,127,0.19960532585779825
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,127,0.12114666899045308
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,127,0.19070400794347128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,127,0.15016000469525656
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,127,0.14128533005714417
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,127,0.1973653237024943
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,127,0.09744000434875488
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,127,0.11885333061218262
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,127,0.13884799679120383
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,127,0.20121600230534872
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,127,0.12944533427556357
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,127,0.15056000153223673
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,127,0.16487466295560202
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,127,0.13809600472450256
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,127,0.15015467007954916
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,127,0.151936004559199
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,127,0.190938671429952
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,127,0.14578133821487427
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,127,0.17546667655309042
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,127,0.15837333599726358
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,127,0.1883466641108195
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,127,0.19133333365122476
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,127,0.1202239990234375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,127,0.15017066399256387
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,127,0.1532906691233317
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,127,0.13337066769599915
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,127,0.2046239972114563
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,127,0.11487467090288798
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,127,0.197434663772583
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,127,0.15902400016784668
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,127,0.10716799894968669
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,127,0.2002826730410258
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,127,0.15474133690198263
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,127,0.139984001715978
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,127,0.13552533586819968
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,127,0.15613333384195963
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,127,0.20492800076802573
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,127,0.0958079993724823
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,127,0.12692800164222717
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,127,0.15249066551526388
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,127,0.17198399702707926
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,127,0.140255997578303
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,127,0.15379732847213745
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,127,0.15852800011634827
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,127,0.139082670211792
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,127,0.20699199040730795
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,127,0.15320000052452087
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,127,0.1618666648864746
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,127,0.20104533433914185
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,127,0.15033066272735596
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,127,0.20154666900634766
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,127,0.16173866391181946
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,127,0.1777226726214091
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,127,0.10874133308728536
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,127,0.15226133664449057
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,127,0.14915200074513754
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,127,0.2095200022061666
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,127,0.14934933185577393
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,127,0.20046399037043253
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,127,0.12928533554077148
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,127,0.16928533713022867
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,127,0.12891200184822083
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,127,0.18302400906880698
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,127,0.17565866311391196
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,127,0.15239999691645303
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,127,0.15380266308784485
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,127,0.135535995165507
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,127,0.19670933485031128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,127,0.19746132691701254
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,127,0.13181333740552267
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,127,0.10069866975148518
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,127,0.1237546702226003
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,127,0.14242133498191833
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,127,0.14403200149536133
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,127,0.17492266496022543
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,127,0.1493280033270518
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,127,0.15892799695332846
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,127,0.12943466504414877
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,127,0.1997386614481608
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,127,0.14086400469144186
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,127,0.17698132991790771
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,127,0.14218133687973022
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,127,0.14287466804186502
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,127,0.14884799718856812
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,127,0.17905600865681967
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,127,0.15709867080052695
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,127,0.10441600282986958
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,127,0.13121066490809122
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,127,0.14396799604098
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,127,0.1406613290309906
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,127,0.1744640072186788
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,127,0.1479626695315043
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,127,0.18147200345993042
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,127,0.15890133380889893
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,127,0.15602133671442667
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,127,0.2025279998779297
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,127,0.17882666985193887
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,127,0.15829333662986755
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,127,0.18428800503412882
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,127,0.1388106644153595
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,127,0.15781866510709128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,127,0.16774932543436685
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,127,0.17099199692408243
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,127,0.20170666774113974
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,127,0.18756266434987387
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,127,0.2055786649386088
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,127,0.12785599629084268
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,127,0.1835040052731832
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,127,0.15100799997647604
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,127,0.14422399799029031
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,127,0.13545599579811096
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,127,0.1973759929339091
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,127,0.14632532993952432
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,127,0.1546453336874644
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,127,0.1606559952100118
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,127,0.19547200202941895
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,127,0.15982932845751444
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,127,0.34392531712849933
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,127,0.3593493302663167
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,127,0.20961600542068481
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,127,0.2181439995765686
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,127,0.16359466314315796
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,127,0.16538666685422262
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,127,0.1590826710065206
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,127,0.1876586675643921
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,127,0.14704533418019614
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,127,0.19935466845830283
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,127,0.12415466705958049
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,127,0.2043786644935608
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,127,0.13499200344085693
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,127,0.17749333381652832
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,127,0.12518933415412903
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,127,0.18210667371749878
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,127,0.3519573211669922
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,127,0.6741546789805094
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,127,0.21553067366282144
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,127,0.4060800075531006
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,127,0.13873066504796347
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,127,0.26769065856933594
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,127,0.17199466625849405
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,127,0.20687466859817505
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,127,0.16357333461443582
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,127,0.17417067289352417
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,255,0.12358933687210083
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,127,0.13212800025939941
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,127,0.2137653430302938
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,127,0.16958399613698324
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,127,0.1613653302192688
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,255,0.11707199613253276
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,255,0.15038933356602988
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,255,0.15846932927767435
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,127,0.15828800201416016
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,255,0.15980266531308493
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,255,0.19539199272791544
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,127,0.2047626574834188
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,255,0.14596266547838846
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,255,0.18060266971588135
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,255,0.14468800028165182
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,255,0.15442132949829102
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,255,0.14849066734313965
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,255,0.20733867088953653
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,255,0.12547733386357626
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,255,0.2453119953473409
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,255,0.2079040010770162
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,255,0.18901866674423218
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,255,0.14798399806022644
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,255,0.18483734130859375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,255,0.09385599692662557
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,255,0.14896532893180847
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,255,0.15255467096964517
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,255,0.15135467052459717
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,255,0.15878400206565857
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,255,0.14658666650454202
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,255,0.143477330605189
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,255,0.2195146679878235
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,255,0.14607466260592142
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,255,0.15364799896876016
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,255,0.12737600008646646
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,255,0.15351466337839761
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,255,0.1604159971078237
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,255,0.12692266702651978
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,255,0.14470400412877402
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,255,0.0958026647567749
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,255,0.12026133139928182
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,255,0.14460266629854837
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,255,0.19526932636896768
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,255,0.18844266732533774
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,255,0.1837973395983378
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,255,0.15095466375350952
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,255,0.16024000446001688
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,255,0.1976906657218933
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,255,0.14938132961591086
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,255,0.14336533347765604
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,255,0.12452266613642375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,255,0.15122133493423462
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,255,0.19108267625172934
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,255,0.14482667048772177
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,255,0.1946773330370585
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,255,0.10326400399208069
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,255,0.14735466241836548
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,255,0.17258665959040323
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,255,0.12408533692359924
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,255,0.11726933717727661
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,255,0.15149866541226706
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,255,0.15647466977437338
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,255,0.1498026649157206
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,255,0.19338132937749228
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,255,0.15028799573580423
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,255,0.20777066548665366
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,255,0.17537067333857217
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,255,0.19147199392318726
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,255,0.12027200063069661
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,255,0.2034719983736674
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,255,0.1237600048383077
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,255,0.09731200337409973
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,255,0.15064533551534018
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,255,0.20433066288630167
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,255,0.13793067137400308
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,255,0.15879467129707336
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,255,0.15195199847221375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,255,0.21220799287160239
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,255,0.15588266650835672
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,255,0.19524266322453818
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,255,0.1569653352101644
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,255,0.17193599541982016
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,255,0.12242666880289714
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,255,0.19193067153294882
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,255,0.15971199671427408
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,255,0.19994133710861206
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,255,0.10103467106819153
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,255,0.15212266643842062
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,255,0.15622933705647787
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,255,0.12854400277137756
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,255,0.1240053375562032
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,255,0.19338132937749228
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,255,0.17649600903193155
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,255,0.15849066774050394
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,255,0.2095306714375814
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,255,0.15215999881426492
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,255,0.19276267290115356
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,255,0.15994667013486227
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,255,0.1002453366915385
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,255,0.11751466989517212
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,255,0.1952213247617086
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,255,0.1523360013961792
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,255,0.20154666900634766
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,255,0.14634133378664652
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,255,0.12362666924794515
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,255,0.14751999576886496
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,255,0.19719467560450235
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,255,0.11776000261306763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,255,0.15971733132998148
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,255,0.18633600076039633
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,255,0.15566399693489075
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,255,0.11820266644159953
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,255,0.1565013329188029
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,255,0.17925333976745605
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,255,0.12276800473531087
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,255,0.19753599166870117
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,255,0.15493333339691162
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,255,0.19431465864181519
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,255,0.20007999738057455
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,255,0.18574400742848715
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,255,0.10601600011189778
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,255,0.12959999839464822
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,255,0.158160001039505
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,255,0.14916800459225973
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,255,0.12930666406949362
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,255,0.20057600736618042
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,255,0.12274666627248128
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,255,0.18929066260655722
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,255,0.14937067031860352
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,255,0.1961173415184021
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,255,0.1578879952430725
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,255,0.19512534141540527
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,255,0.12967466314633688
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,255,0.19708265860875449
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,255,0.18526933590571085
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,255,0.2141759991645813
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,255,0.15253333250681558
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,255,0.19797333081563315
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,255,0.15802133083343506
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,255,0.19400533040364584
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,255,0.15030933419863382
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,255,0.13290133078893027
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,255,0.15648000439008078
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,255,0.19223999977111816
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,255,0.11804266770680745
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,255,0.17788267135620117
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,255,0.1978506644566854
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,255,0.19035732746124268
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,255,0.11909866333007812
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,255,0.16516799728075662
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,255,0.3749226729075114
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,255,0.38148800532023114
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,255,0.23254932959874472
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,255,0.24278932809829712
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,255,0.17729065815607706
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,255,0.17431465784708658
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,255,0.15762133399645487
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,255,0.2054133415222168
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,255,0.16371200482050577
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,255,0.19588265816370645
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,255,0.12690666317939758
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,255,0.18092799186706543
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,255,0.15693333745002747
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,255,0.15827733278274536
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,255,0.15618667006492615
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,255,0.20447999238967896
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,255,0.40585601329803467
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,255,0.7173866430918375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,255,0.25378666321436566
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,255,0.4499359925587972
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,255,0.20723199844360352
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,255,0.31961599985758465
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,255,0.17947733402252197
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,255,0.2500693400700887
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,255,0.17261866728464761
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,255,0.23105067014694214
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,255,0.15918399890263876
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,255,0.1713013251622518
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,255,0.21255467335383096
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,255,0.21330134073893228
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,511,0.1532426675160726
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,511,0.09983467062314351
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,255,0.1658399999141693
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,511,0.15362133582433066
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,511,0.12166933218638103
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,511,0.1839253306388855
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,255,0.22258667151133218
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,511,0.15897599856058756
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,511,0.1535040040810903
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,511,0.16699200868606567
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,511,0.19988266626993814
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,511,0.11665599544843037
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,511,0.1523360013961792
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,511,0.11693333586057027
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,511,0.18616533279418945
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,511,0.15614933768908182
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,511,0.1262399951616923
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,511,0.16148799657821655
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,511,0.09687999884287517
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,511,0.12446932991345723
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,511,0.12436266740163167
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,511,0.15169066190719604
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,511,0.15445866187413534
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,511,0.13995200395584106
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,511,0.19998933871587118
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,511,0.12029332915941875
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,511,0.16291733582814535
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,511,0.15370666980743408
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,511,0.12544000148773193
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,511,0.19825067122777304
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,511,0.11614933609962463
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,511,0.173418660958608
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,511,0.15442132949829102
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,511,0.1954560081164042
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,511,0.10239467024803162
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,511,0.12090667088826497
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,511,0.1209333340326945
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,511,0.19598400592803955
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,511,0.11544533570607503
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,511,0.12666666507720947
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,511,0.1771626671155294
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,511,0.15692800283432007
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,511,0.1476906637350718
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,511,0.1558986703554789
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,511,0.15155200163523355
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,511,0.19329599539438883
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,511,0.11493866642316182
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,511,0.15481066703796387
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,511,0.11268267035484314
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,511,0.20030933618545532
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,511,0.09703466296195984
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,511,0.13327999909718832
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,511,0.2015413244565328
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,511,0.11542399724324544
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,511,0.19774399201075235
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,511,0.11617599924405415
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,511,0.15802666544914246
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,511,0.1534293293952942
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,511,0.11197866996129353
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,511,0.11434666315714519
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,511,0.14381866653760275
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,511,0.16382400194803873
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,511,0.15637333194414774
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,511,0.1969226598739624
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,511,0.14806399742762247
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,511,0.1934453248977661
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,511,0.09627733627955119
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,511,0.12774399916330972
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,511,0.09706667065620422
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,511,0.16668800512949625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,511,0.15689067045847574
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,511,0.1337493360042572
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,511,0.1625279982884725
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,511,0.1145919958750407
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,511,0.1976906657218933
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,511,0.11364266276359558
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,511,0.1895680030186971
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,511,0.1179200013478597
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,511,0.15735999743143717
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,511,0.19115199645360312
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,511,0.1511146624883016
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,511,0.1360266705354055
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,511,0.14828800161679587
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,511,0.09518933296203613
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,511,0.12298666437466939
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,511,0.14967466394106546
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,511,0.11547199885050456
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,511,0.19263466199239096
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,511,0.1470186710357666
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,511,0.16676799456278482
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,511,0.20198400815327963
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,511,0.11758400003115337
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,511,0.20249066750208536
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,511,0.11729600032170613
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,511,0.19263466199239096
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,511,0.1583199997742971
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,511,0.14683199922243753
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,511,0.17062399784723917
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,511,0.10211199522018433
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,511,0.19029333194096884
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,511,0.127893328666687
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,511,0.1588053305943807
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,511,0.19404800732930502
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,511,0.1586079994837443
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,511,0.13125866651535034
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,511,0.16139733791351318
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,511,0.14459199706713358
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,511,0.16024000446001688
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,511,0.14917866388956705
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,511,0.1959999998410543
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,511,0.14877866705258688
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,511,0.20835200945536295
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,511,0.19413334131240845
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,511,0.12709333499272665
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,511,0.16240533192952475
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,511,0.45467201868693036
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,511,0.1394773324330648
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,511,0.12177600463231404
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,511,0.15109866857528687
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,511,0.1225333313147227
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,511,0.2034613291422526
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,511,0.2035520076751709
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,511,0.12261866529782613
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,511,0.14818132917086282
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,511,0.11833066741625468
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,511,0.1525706648826599
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,511,0.11483200391133626
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,511,0.15759999553362528
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,511,0.15717867016792297
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,511,0.2049013376235962
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,511,0.21624000867207846
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,511,0.23748266696929932
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,511,0.14825600385665894
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,511,0.19535466035207114
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,511,0.12854400277137756
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,511,0.16105600198109946
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,511,0.16487466295560202
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,511,0.16699733336766562
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,511,0.12599999705950418
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,511,0.18948266903559366
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,511,0.12077333529790242
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,511,0.19033600886662802
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,511,0.12569600343704224
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,511,0.1600266695022583
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,511,0.1611199975013733
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,511,0.17001599073410034
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,511,0.5189386606216431
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,511,0.42979200681050617
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,511,0.2870453397432963
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,511,0.22934399048487344
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,511,0.29179199536641437
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,511,0.22595200935999551
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,511,0.20256533225377402
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,511,0.21688532829284668
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,511,0.1819093426068624
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,511,0.199562668800354
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,511,0.17459734280904135
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,511,0.1808799902598063
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,511,0.17196265856424967
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,511,0.17882666985193887
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,511,0.21740265687306723
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,511,0.5257813135782877
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,511,0.7986559867858887
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,511,0.3100480039914449
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,511,0.5482186476389567
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,511,0.4129653374354045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,511,0.36208001772562665
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,511,0.2840906580289205
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,511,0.1734293301900228
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,511,0.3475840091705322
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,511,0.27165865898132324
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,511,0.3237066666285197
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,511,0.2598560055096944
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,1023,0.12041067083676656
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,511,0.3162986636161804
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,1023,0.15465066830317178
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,511,0.2592800060908
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,511,0.30825599034627277
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,1023,0.116047998269399
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,1023,0.15064533551534018
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,1023,0.1472640037536621
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,1023,0.19166932503382364
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,1023,0.14890133341153464
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,1023,0.20601065953572592
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,1023,0.1179039975007375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,511,0.25831466913223267
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,1023,0.20173333088556925
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,1023,0.14812800288200378
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,511,0.3083999951680501
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,1023,0.1181813379128774
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,1023,0.19396267334620157
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,1023,0.14942933122316995
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,1023,0.11557333668073018
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,1023,0.20860799153645834
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,1023,0.19260799884796143
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,1023,0.12924800316492716
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,1023,0.12358933687210083
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,1023,0.15788267056147257
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,1023,0.14851733048756918
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,1023,0.157151997089386
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,1023,0.11594667037328084
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,1023,0.15173866351445517
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,1023,0.1639840006828308
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,1023,0.20082666476567587
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,1023,0.12088533242543538
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,1023,0.15265599886576334
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,1023,0.15252799789110819
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,1023,0.147599995136261
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,1023,0.1977120041847229
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,1023,0.15067199865976968
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,1023,0.20058133204778036
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,1023,0.10043199857076009
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,1023,0.15866133570671082
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,1023,0.13190399607022604
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,1023,0.11868266264597575
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,1023,0.18528000513712564
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,1023,0.150736004114151
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,1023,0.15914666652679443
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,1023,0.1586240033308665
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,1023,0.19765333334604898
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,1023,0.11843199531237285
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,1023,0.19672532876332602
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,1023,0.11494400103886922
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,1023,0.18780267238616943
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,1023,0.12222933769226074
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,1023,0.1188213328520457
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,1023,0.1527839998404185
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,1023,0.10022399822870891
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,1023,0.1246666709582011
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,1023,0.11700800061225891
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,1023,0.11757866541544597
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,1023,0.12477333347002666
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,1023,0.18611733118693033
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,1023,0.18289599816004434
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,1023,0.12017599741617839
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,1023,0.1569973329703013
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,1023,0.18498667081197104
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,1023,0.15077867110570273
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,1023,0.11563733220100403
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,1023,0.20007999738057455
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,1023,0.15174399813016257
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,1023,0.15036267042160034
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,1023,0.19948800404866537
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,1023,0.0972053309281667
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,1023,0.1239306628704071
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,1023,0.1613706648349762
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,1023,0.19615999857584634
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,1023,0.10380267103513081
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,1023,0.15270933508872986
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,1023,0.15252799789110819
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,1023,0.153546671072642
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,1023,0.15756799777348837
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,1023,0.11318932970364888
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,1023,0.20639467239379883
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,1023,0.15132799744606018
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,1023,0.1548906664053599
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,1023,0.15199466546376547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,1023,0.19241599241892496
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,1023,0.10076266527175903
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,1023,0.12568533420562744
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,1023,0.09820800026257832
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,1023,0.20390933752059937
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,1023,0.12342400352160136
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,1023,0.15930666526158652
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,1023,0.15537066260973612
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,1023,0.19010667006174722
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,1023,0.12825600306193033
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,1023,0.15638400117556253
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,1023,0.11628266175587972
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,1023,0.2097439964612325
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,1023,0.11801066994667053
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,1023,0.15639999508857727
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,1023,0.15416533748308817
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,1023,0.19005332390467325
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,1023,0.0999679962793986
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,1023,0.12482133507728577
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,1023,0.11595199505488078
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,1023,0.2067199945449829
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,1023,0.11921067039171855
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,1023,0.15686933199564615
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,1023,0.21313599745432535
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,1023,0.15106667081514993
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,1023,0.19418134291966757
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,1023,0.12787733475367227
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,1023,0.19723200798034668
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,1023,0.1534293293952942
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,1023,0.11573333541552226
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,1023,0.1557919979095459
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,1023,0.15730133652687073
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,1023,0.12762666742006937
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,1023,0.12262399991353352
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,1023,0.203658660252889
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,1023,0.15096533298492432
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,1023,0.16364799936612448
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,1023,0.14867200454076132
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,1023,0.16806934277216592
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,1023,0.12173866232236226
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,1023,0.15294933319091797
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,1023,0.12462932864824931
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,1023,0.19714667399724325
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,1023,0.125791996717453
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,1023,0.19233600298563638
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,1023,0.1239946683247884
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,1023,0.15517866611480713
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,1023,0.12180266777674358
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,1023,0.15636266271273294
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,1023,0.27190399169921875
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,1023,0.28339733680089313
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,1023,0.1969119906425476
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,1023,0.1546986699104309
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,1023,0.210533340771993
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,1023,0.17514665921529135
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,1023,0.20276800791422525
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,1023,0.16087999939918518
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,1023,0.17337065935134888
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,1023,0.2057173252105713
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,1023,0.15065067013104758
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,1023,0.17533334096272787
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,1023,0.14899200201034546
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,1023,0.20908266305923462
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,1023,0.38095998764038086
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,1023,0.16035733620325723
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,1023,0.16478932897249857
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,1023,0.5473866860071818
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,1023,0.5202186504999796
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,1023,0.3030719955762227
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,1023,0.3857920169830322
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,1023,0.3200266758600871
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,1023,0.3264639973640442
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,1023,0.28199466069539386
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,1023,0.29341334104537964
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,1023,0.2779680093129476
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,1023,0.2828480005264282
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,1023,0.2709280053774516
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,1023,0.2619733413060506
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,1023,0.2742453416188558
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,1023,0.26543466250101727
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,1023,0.2778720060984294
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,1023,0.7425599892934164
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,1023,0.9789653619130453
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,1023,0.5027893384297689
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,1023,0.5544586579004923
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,1023,0.7364373207092285
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,1023,0.5941333373387655
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,1023,0.47916801770528156
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,1023,0.5396746794382731
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,1023,0.45902931690216064
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,1023,0.5116159915924072
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,1023,0.4524960120519002
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,1023,0.49484264850616455
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,1023,0.44861332575480145
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,1023,0.48842132091522217
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,2047,0.09665066997210185
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,1023,0.44570668538411456
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,2047,0.12242666880289714
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,1023,0.4952319860458374
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,2047,0.1284160017967224
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,2047,0.1546346644560496
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,2047,0.1546026666959127
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,2047,0.19075200955073038
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,2047,0.11980799833933513
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,2047,0.20193066199620566
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,2047,0.1178559958934784
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,2047,0.18953599532445273
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,2047,0.13482133547465006
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,2047,0.15504533052444458
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,2047,0.12317867080370586
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,2047,0.1551413337389628
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,2047,0.19307732582092285
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,2047,0.11680000027020772
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,2047,0.19814932346343994
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,2047,0.09494400024414062
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,2047,0.130949338277181
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,2047,0.15204266707102457
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,2047,0.14974932869275412
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,2047,0.19758933782577515
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,2047,0.15243200461069742
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,2047,0.20331199963887533
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,2047,0.19756799936294556
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,2047,0.1584213376045227
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,2047,0.15236799915631613
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,2047,0.13377066453297934
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,2047,0.15376533071200052
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,2047,0.1541706621646881
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,2047,0.15370666980743408
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,2047,0.15241600076357523
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,2047,0.10425600409507751
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,2047,0.1544319987297058
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,2047,0.13125333189964294
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,2047,0.12124266227086385
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,2047,0.1520853340625763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,2047,0.1678559978802999
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,2047,0.1237386663754781
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,2047,0.19144533077875772
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,2047,0.15127467115720114
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,2047,0.162063995997111
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,2047,0.153738667567571
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,2047,0.15438933173815408
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,2047,0.118559996287028
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,2047,0.1529706617196401
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,2047,0.16941867272059122
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,2047,0.12180800239245097
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,2047,0.1306773324807485
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,2047,0.19932266076405844
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,2047,0.10405332843462627
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,2047,0.12713600198427835
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,2047,0.1495199998219808
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,2047,0.1223413348197937
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,2047,0.1514079968134562
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,2047,0.15602133671442667
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,2047,0.13153599699338278
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,2047,0.19192000230153403
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,2047,0.15595199664433798
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,2047,0.12267733613650005
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,2047,0.20068800449371338
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,2047,0.15432000160217285
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,2047,0.15264532963434854
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,2047,0.14759467045466104
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,2047,0.11703999837239583
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,2047,0.1488746702671051
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,2047,0.10688533385594685
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,2047,0.18841065963109335
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,2047,0.13151466846466064
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,2047,0.19771732886632284
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,2047,0.15068800250689188
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,2047,0.15825066963831583
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,2047,0.11659733454386394
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,2047,0.15335466464360556
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,2047,0.12877333164215088
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,2047,0.18938666582107544
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,2047,0.1218346655368805
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,2047,0.19827200969060263
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,2047,0.14963199694951376
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,2047,0.15453867117563883
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,2047,0.09985599915186565
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,2047,0.1251040001710256
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,2047,0.1313759982585907
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,2047,0.1917333404223124
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,2047,0.12659200032552084
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,2047,0.20402665932973227
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,2047,0.1551093359788259
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,2047,0.15553067127863565
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,2047,0.1223520040512085
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,2047,0.15677866339683533
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,2047,0.12933866182963052
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,2047,0.19613866011301676
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,2047,0.1267519990603129
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,2047,0.20305599768956503
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,2047,0.11755733688672383
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,2047,0.15682666500409445
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,2047,0.11683199803034465
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,2047,0.13322133819262186
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,2047,0.13477866848309836
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,2047,0.18967467546463013
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,2047,0.12928533554077148
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,2047,0.20165866613388062
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,2047,0.12417067090670268
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,2047,0.15546133120854697
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,2047,0.12613333264986673
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,2047,0.15967999895413718
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,2047,0.15762666861216226
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,2047,0.1304746667544047
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,2047,0.19169066349665323
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,2047,0.13077333569526672
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,2047,0.20046399037043253
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,2047,0.16296000281969705
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,2047,0.12481600046157837
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,2047,0.20315200090408325
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,2047,0.21093867222468057
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,2047,0.16174399852752686
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,2047,0.18487467368443808
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,2047,0.174453337987264
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,2047,0.16563733418782553
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,2047,0.19819732507069907
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,2047,0.1432266632715861
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,2047,0.16471466422080994
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,2047,0.14313600460688272
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,2047,0.1675306757291158
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,2047,0.14472533265749613
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,2047,0.19885333379109701
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,2047,0.16406400005022684
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,2047,0.1989013353983561
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,2047,0.3794506788253784
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,2047,0.25996265808741253
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,2047,0.2693866689999898
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,2047,0.3792106707890828
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,2047,0.27059733867645264
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,2047,0.28989332914352417
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,2047,0.314303994178772
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,2047,0.26844799518585205
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,2047,0.2885333299636841
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,2047,0.24633600314458212
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,2047,0.2535039981206258
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,2047,0.24454933404922485
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,2047,0.26266666253407794
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,2047,0.2505013346672058
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,2047,0.263866662979126
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,2047,0.25872000058492023
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,2047,0.703711986541748
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,2047,0.7578186988830566
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,2047,0.5734666585922241
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,2047,0.5787306626637777
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,2047,0.4780160188674927
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,2047,0.515999992688497
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,2047,0.48042134443918866
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,2047,0.5043786764144897
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,2047,0.4702879985173543
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,2047,0.48334399859110516
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,2047,0.46616001923878986
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,2047,0.46032532056172687
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,2047,1.1650826930999756
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,2047,0.46857066949208576
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,2047,0.4508479833602905
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,2047,1.1103306611378987
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,2047,0.4470826784769694
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,2047,0.4554506540298462
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,2047,1.3483093579610188
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,2047,0.9338933626810709
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,2047,0.8879626592000326
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,2047,0.9766186873118082
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,2047,0.8579146862030029
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,2047,0.9161653518676758
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,2047,0.8414080142974854
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,2047,0.8705173333485922
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,2047,0.8734133243560791
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,2047,0.8309493064880371
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,2047,0.873248020807902
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,2047,0.8248693148295084
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,2047,0.8196960290273031
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,4095,0.09852799773216248
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,2047,0.871503988901774
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,4095,0.12181867162386577
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,4095,0.12265599767367046
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,4095,0.11921599507331848
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,4095,0.11986666917800903
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,4095,0.1602186659971873
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,4095,0.15033599734306335
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,4095,0.19776533047358194
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,4095,0.19353065888086954
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,4095,0.15824000040690103
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,4095,0.124208003282547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,4095,0.09777599573135376
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,4095,0.18996800978978476
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,4095,0.12180266777674358
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,4095,0.158053328593572
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,4095,0.14713066816329956
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,4095,0.19676266113917032
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,4095,0.12723732988039652
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,4095,0.12110933661460876
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,4095,0.19569599628448486
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,4095,0.1197813351949056
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,4095,0.1458453337351481
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,4095,0.15406933426856995
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,4095,0.13541332880655924
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,4095,0.1922666629155477
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,4095,0.1602773368358612
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,4095,0.2049973408381144
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,4095,0.19550400972366333
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,4095,0.1207360029220581
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,4095,0.1898933251698812
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,4095,0.11771733562151591
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,4095,0.1609440048535665
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,4095,0.10149866342544556
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,4095,0.12820800145467123
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,4095,0.15778133273124695
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,4095,0.19400533040364584
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,4095,0.11958400408426921
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,4095,0.19907732804616293
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,4095,0.12247999509175618
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,4095,0.16204266746838888
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,4095,0.15160000324249268
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,4095,0.19714667399724325
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,4095,0.15806933244069418
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,4095,0.19518399238586426
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,4095,0.12427733341852824
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,4095,0.19595199823379517
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,4095,0.11740799744923909
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,4095,0.15966932972272238
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,4095,0.09776000181833903
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,4095,0.12667733430862427
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,4095,0.15834133823712668
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,4095,0.19458667437235513
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,4095,0.11683733264605205
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,4095,0.19529600938161215
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,4095,0.11681600411732991
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,4095,0.1604693333307902
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,4095,0.15169066190719604
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,4095,0.1627840002377828
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,4095,0.19813867410024008
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,4095,0.15598932902018228
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,4095,0.1948266625404358
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,4095,0.15607999761899313
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,4095,0.19767999649047852
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,4095,0.11779733498891194
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,4095,0.19170665740966797
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,4095,0.1179093321164449
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,4095,0.10221333305040996
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,4095,0.13105066617329916
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,4095,0.15344533324241638
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,4095,0.20282665888468424
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,4095,0.15315199891726175
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,4095,0.1962133248647054
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,4095,0.12590400377909342
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,4095,0.15237866838773093
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,4095,0.15019200245539346
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,4095,0.16286399960517883
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,4095,0.15135467052459717
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,4095,0.170522669951121
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,4095,0.19659199317296347
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,4095,0.15454399585723877
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,4095,0.19352533419926962
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,4095,0.12325333555539449
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,4095,0.1330826679865519
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,4095,0.1571466624736786
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,4095,0.15507733821868896
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,4095,0.16474133729934692
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,4095,0.15838932991027832
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,4095,0.15414933363596597
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,4095,0.19848533471425375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,4095,0.15269333124160767
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,4095,0.1550826629002889
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,4095,0.12617599964141846
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,4095,0.19483200709025064
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,4095,0.19612266620000204
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,4095,0.16075733304023743
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,4095,0.1973759929339091
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,4095,0.17256534099578857
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,4095,0.18493866920471191
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,4095,0.1662986675898234
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,4095,0.1657386620839437
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,4095,0.1620213290055593
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,4095,0.15318933129310608
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,4095,0.1660480002562205
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,4095,0.2025973399480184
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,4095,0.16390400131543478
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,4095,0.16961065928141275
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,4095,0.20376000801722208
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,4095,0.15178666512171426
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,4095,0.16131732861200967
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,4095,0.16961600383122763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,4095,0.19957866271336874
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,4095,0.2003999948501587
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,4095,0.3097760081291199
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,4095,0.3115466634432475
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,4095,0.26232000192006427
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,4095,0.24204800526301065
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,4095,0.27636800209681195
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,4095,0.2479040026664734
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,4095,0.2640320062637329
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,4095,0.24436799685160318
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,4095,0.2645333409309387
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,4095,0.25812800725301105
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,4095,0.23975465695063272
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,4095,0.25756800174713135
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,4095,0.23536533117294312
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,4095,0.2534613410631816
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,4095,0.23507734139760336
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,4095,0.255018671353658
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,4095,0.5855093399683634
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,4095,0.569381316502889
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,4095,0.4888853232065837
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,4095,0.5055199861526489
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,4095,0.45850133895874023
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,4095,0.4747519890467326
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,4095,0.446885347366333
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,4095,0.4628586769104004
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,4095,0.4423146645228068
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,4095,0.4545706510543823
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,4095,0.4410613377888997
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,4095,0.455349326133728
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,4095,0.43300799528757733
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,4095,0.952064037322998
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,4095,0.45000000794728595
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,4095,0.4371680021286011
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,4095,0.4540160099665324
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,4095,0.8666826883951823
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,4095,1.175317366917928
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,4095,1.0754773616790771
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,4095,0.9482506910959879
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,4095,0.8906346956888834
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,4095,0.8921813170115153
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,4095,0.861525297164917
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,4095,0.8511626720428467
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,4095,0.8547840118408203
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,4095,0.8352800210316976
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,4095,0.8391146659851074
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,4095,0.8325386842091879
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,4095,0.8262399832407633
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,4095,0.828010638554891
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,4095,0.838752031326294
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,4095,1.9930133819580078
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,4095,2.1035307248433432
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,4095,1.694757302602132
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,4095,1.8590240478515625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,4095,1.6394507090250652
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,4095,1.701690673828125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,4095,1.6095093091328938
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,4095,1.647546609242757
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,4095,1.59115203221639
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,4095,1.6372106870015461
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,4095,1.5828159650166829
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,4095,1.6137653986612956
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,4095,1.5777920087178547
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,4095,1.605445384979248
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,4095,1.5749279657999675
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,8191,0.10969600081443787
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,4095,1.6053706804911296
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,8191,0.11935999989509583
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,8191,0.15125866731007895
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,8191,0.160453329483668
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,8191,0.1486293375492096
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,8191,0.19250667095184326
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,8191,0.15010666847229004
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,8191,0.198362668355306
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,8191,0.14969066778818765
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,8191,0.1890559991200765
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,8191,0.15341867009798685
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,8191,0.15938666462898254
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,8191,0.14936000108718872
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,8191,0.1969546675682068
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,8191,0.1478613317012787
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,8191,0.19871999820073447
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,8191,0.09726933638254802
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,8191,0.11969066659609477
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,8191,0.12099732955296834
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,8191,0.16005866726239523
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,8191,0.15041599671045938
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,8191,0.19354132811228433
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,8191,0.14903466900189719
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,8191,0.20188266038894653
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,8191,0.1524799962838491
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,8191,0.22659200429916382
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,8191,0.12340266505877177
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,8191,0.18907199303309122
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,8191,0.122597336769104
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,8191,0.1558986703554789
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,8191,0.1966773271560669
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,8191,0.14883200327555338
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,8191,0.16345066825548807
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,8191,0.10150933265686035
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,8191,0.12117866675059001
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,8191,0.1672266721725464
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,8191,0.15124266346295676
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,8191,0.19247466325759888
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,8191,0.15145066380500793
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,8191,0.15321066975593567
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,8191,0.19282132387161255
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,8191,0.13405332962671915
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,8191,0.19322667519251505
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,8191,0.15018133322397867
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,8191,0.1567093332608541
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,8191,0.1920213301976522
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,8191,0.15339199701944986
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,8191,0.19234667221705118
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,8191,0.09819199641545613
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,8191,0.13190399607022604
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,8191,0.15324800213178
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,8191,0.19195199012756348
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,8191,0.128629336754481
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,8191,0.1942453384399414
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,8191,0.14546666542689005
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,8191,0.19961067040761313
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,8191,0.14707733194033304
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,8191,0.16222932934761047
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,8191,0.151829332113266
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,8191,0.1946400006612142
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,8191,0.1188106636206309
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,8191,0.19021334250768027
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,8191,0.1493333379427592
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,8191,0.19900800784428915
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,8191,0.15310399731000265
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,8191,0.11995733777681987
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,8191,0.13868266344070435
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,8191,0.12793599565823874
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,8191,0.19229867060979208
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,8191,0.1345866620540619
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,8191,0.19429866472880045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,8191,0.20100265741348267
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,8191,0.14918399850527445
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,8191,0.19309866428375244
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,8191,0.13012799620628357
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,8191,0.18343466520309448
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,8191,0.16062399744987488
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,8191,0.1507146656513214
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,8191,0.15524799625078836
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,8191,0.1513653298219045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,8191,0.2020533283551534
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,8191,0.17915199200312296
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,8191,0.16989866892496744
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,8191,0.1717546582221985
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,8191,0.1571466624736786
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,8191,0.16516799728075662
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,8191,0.20307199160257974
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,8191,0.1558133363723755
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,8191,0.1637173295021057
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,8191,0.19801066319147745
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,8191,0.17283733685811362
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,8191,0.20097599426905313
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,8191,0.17119999726613364
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,8191,0.15556800365447998
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,8191,0.16115199526151022
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,8191,0.16381866733233133
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,8191,0.2791999975840251
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,8191,0.27058132489522296
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,8191,0.2832373380661011
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,8191,0.27702399094899494
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,8191,0.2646026611328125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,8191,0.27012266715367633
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,8191,0.2535093426704407
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,8191,0.2504693269729614
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,8191,0.2616693377494812
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,8191,0.2551093300183614
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,8191,0.2598399917284648
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,8191,0.524895985921224
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,8191,0.25065066417058307
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,8191,0.29867732524871826
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,8191,0.2486506700515747
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,8191,0.25730667511622113
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,8191,0.24709333976109824
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,8191,0.512880007425944
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,8191,0.45632533232371014
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,8191,0.4683786630630493
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,8191,0.4426826635996501
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,8191,0.4605226516723633
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,8191,0.4363466501235962
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,8191,0.44677333037058514
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,8191,0.4310986598332723
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,8191,0.4508800109227498
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,8191,0.4328800042470296
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,8191,0.43906132380167645
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,8191,0.43168532848358154
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,8191,0.44648532072703045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,8191,0.4315733512242635
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,8191,0.44167999426523846
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,8191,0.9966986974080404
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,8191,0.9487786293029785
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,8191,0.862506628036499
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,8191,0.8650453090667725
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,8191,0.8444266319274902
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,8191,0.850325345993042
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,8191,0.818447987238566
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,8191,0.8290613492329916
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,8191,0.8313706715901693
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,8191,0.8225706418355306
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,8191,0.8358613650004069
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,8191,0.8200639883677164
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,8191,0.817471981048584
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,8191,1.7081386248270671
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,8191,0.82369065284729
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,8191,0.8148852984110514
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,8191,0.817466656366984
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,8191,2.0016372998555503
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,8191,1.8152906099955242
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,8191,1.6001332600911458
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,8191,1.693178653717041
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,8191,1.651594638824463
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,8191,1.6354079246520996
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,8191,1.6140373547871907
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,8191,1.5834719340006511
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,8191,1.5833919843037922
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,8191,1.5901439984639485
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,8191,1.5761119524637859
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,8191,1.5854026476542156
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,8191,1.5861013730367024
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,8191,1.5805974006652832
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,8191,1.5747307141621907
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,8191,3.6528746287027993
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,8191,3.574885368347168
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,8191,3.217205365498861
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,8191,3.3390026092529297
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,8191,3.1572319666544595
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,8191,3.208826700846354
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,8191,3.120933214823405
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,8191,3.138773282368978
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,8191,3.1090453465779624
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,8191,3.1149333318074546
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,8191,3.096885363260905
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,8191,3.100501378377279
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,8191,3.0946133931477866
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,8191,3.1009012858072915
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,16383,0.15557333827018738
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,8191,3.0856800079345703
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,8191,3.121530532836914
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,16383,0.09821866949399312
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,16383,0.1309279998143514
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,16383,0.14969600240389505
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,16383,0.15288000305493674
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,16383,0.1545973320802053
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,16383,0.12463999787966411
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,16383,0.15247467160224915
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,16383,0.11826133728027344
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,16383,0.1996799906094869
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,16383,0.1285920043786367
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,16383,0.19954133033752441
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,16383,0.15572266777356467
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,16383,0.16109333435694376
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,16383,0.12742400169372559
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,16383,0.20163732767105103
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,16383,0.15920000274976095
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,16383,0.09909866253534953
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,16383,0.12614400188128153
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,16383,0.13275200128555298
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,16383,0.1328480045000712
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,16383,0.1556426684061686
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,16383,0.14177067081133524
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,16383,0.12809600432713827
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,16383,0.19881065686543783
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,16383,0.1313920021057129
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,16383,0.16060800353686014
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,16383,0.1539359986782074
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,16383,0.15869333346684775
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,16383,0.1275200049082438
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,16383,0.15530133247375488
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,16383,0.14841600259145102
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,16383,0.10622933506965637
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,16383,0.12739200393358865
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,16383,0.12541332840919495
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,16383,0.15577600399653116
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,16383,0.15680000185966492
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,16383,0.12256532907485962
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,16383,0.16261866688728333
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,16383,0.12442666292190552
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,16383,0.1905919909477234
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,16383,0.15712533394495645
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,16383,0.15350932876269022
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,16383,0.1509066621462504
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,16383,0.13184000054995218
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,16383,0.15271466970443726
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,16383,0.1527733306090037
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,16383,0.15355733036994934
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,16383,0.15256533026695251
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,16383,0.11498133341471355
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,16383,0.1548853317896525
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,16383,0.15773333112398782
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,16383,0.16249066591262817
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,16383,0.1957706610361735
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,16383,0.12690666317939758
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,16383,0.20528000593185425
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,16383,0.12551466623942056
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,16383,0.19334399700164795
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,16383,0.19871467351913452
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,16383,0.15427733461062113
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,16383,0.15729600191116333
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,16383,0.16460800170898438
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,16383,0.19268266359965006
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,16383,0.12425600488980611
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,16383,0.17441066106160483
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,16383,0.1755146582921346
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,16383,0.16990399360656738
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,16383,0.19960532585779825
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,16383,0.17456533511479697
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,16383,0.20808533827463785
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,16383,0.16662933429082236
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,16383,0.16156267126401266
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,16383,0.15306133031845093
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,16383,0.2017013430595398
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,16383,0.17733333508173624
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,16383,0.19913599888483682
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,16383,0.15361600120862326
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,16383,0.20641066630681357
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,16383,0.1687999963760376
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,16383,0.20298133293787637
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,16383,0.2840053240458171
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,16383,0.2791840036710103
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,16383,0.28883200883865356
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,16383,0.26758400599161786
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,16383,0.27219732602437335
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,16383,0.2593173384666443
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,16383,0.252895991007487
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,16383,0.26794666051864624
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,16383,0.2497546672821045
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,16383,0.26224533716837567
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,16383,0.24810665845870972
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,16383,0.2509066661198934
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,16383,0.29734400908152264
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,16383,0.26096532742182416
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,16383,0.4652213255564372
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,16383,0.24998400608698526
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,16383,0.2654239932696025
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,16383,0.4869973262151082
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,16383,0.4880373477935791
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,16383,0.4986720085144043
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,16383,0.4864426851272583
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,16383,0.45240533351898193
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,16383,0.47958401838938397
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,16383,0.4463520050048828
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,16383,0.5568906863530477
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,16383,0.4330293337504069
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,16383,0.4755573272705078
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,16383,0.44254934787750244
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,16383,0.5524906714757284
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,16383,0.4426453510920207
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,16383,0.8379413286844889
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,16383,0.4705066680908203
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,16383,0.44331200917561847
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,16383,0.9297173023223877
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,16383,0.8790720303853353
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,16383,0.8314613501230875
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,16383,0.8248906930287679
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,16383,0.834287961324056
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,16383,0.8202453454335531
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,16383,0.8148693243662516
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,16383,0.8249813715616862
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,16383,0.8080693085988363
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,16383,0.8155626455942789
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,16383,0.829802672068278
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,16383,0.8142186800638834
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,16383,0.8127146561940511
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,16383,0.8196907043457031
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,16383,0.8075413703918457
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,16383,1.8231840133666992
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,16383,1.696730613708496
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,16383,1.6199626922607422
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,16383,1.6487733523050945
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,16383,1.5986453692118328
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,16383,1.6000320116678874
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,16383,1.583621342976888
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,16383,1.5723786354064941
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,16383,1.5596373875935872
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,16383,1.56604798634847
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,16383,1.5791786511739094
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,16383,1.5647999445597331
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,16383,1.5722667376200359
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,16383,1.576602617899577
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,16383,1.5637173652648926
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,16383,1.5499359766642253
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,16383,3.117087999979655
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,16383,3.653898557027181
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,16383,3.331077257792155
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,16383,3.063098589579264
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,16383,3.2313013076782227
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,16383,3.184816042582194
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,16383,3.1633227666219077
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,16383,3.1333068211873374
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,16383,3.1010611852010093
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,16383,3.11733341217041
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,16383,3.0517600377400718
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,16383,3.0699892044067383
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,16383,3.110549290974935
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,16383,3.0927734375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,16383,3.090511957804362
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,16383,3.0724051793416343
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1024,1,1,16383,7.004650751749675
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1024,1,1,16383,6.542197545369466
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1024,1,2,16383,6.260965347290039
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1024,1,2,16383,6.267365137736003
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1024,1,4,16383,6.200693130493164
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1024,1,4,16383,6.185397466023763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1024,1,8,16383,6.167418797810872
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1024,1,8,16383,6.107610702514648
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1024,1,16,16383,6.153258641560872
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1024,1,16,16383,6.045647939046224
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1024,1,32,16383,6.124597549438477
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1024,1,32,16383,6.149909337361653
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1024,1,64,16383,6.145498911539714
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,32767,0.15108799934387207
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,32767,0.19960000117619833
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1024,1,64,16383,6.1341813405354815
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1024,1,128,16383,6.123290379842122
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1024,1,128,16383,6.121610641479492
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,32767,0.10165866216023763
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,32767,0.11929600437482198
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,32767,0.122789333264033
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,32767,0.12043733398119609
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,32767,0.1937119960784912
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,32767,0.16107733050982156
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,32767,0.15185067057609558
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,32767,0.1954453388849894
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,32767,0.13051733374595642
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,32767,0.1573919951915741
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,32767,0.15639467040697733
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,32767,0.16142933567365012
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,32767,0.19715734322865805
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,32767,0.15377599994341531
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,32767,0.20578666528066
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,32767,0.15265599886576334
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,32767,0.10751466949780782
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,32767,0.1285706659158071
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,32767,0.13289599617322287
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,32767,0.15735466281572977
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,32767,0.15685333808263144
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,32767,0.19200533628463745
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,32767,0.16736533244450888
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,32767,0.19539199272791544
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,32767,0.1911946733792623
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,32767,0.12812266747156778
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,32767,0.19583467642466226
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,32767,0.13796266913414001
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,32767,0.1955146590868632
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,32767,0.11886933445930481
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,32767,0.122079998254776
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,32767,0.13331199685732523
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,32767,0.15504533052444458
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,32767,0.1566933294137319
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,32767,0.14087466398874918
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,32767,0.16671466827392578
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,32767,0.16641599933306375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,32767,0.15481066703796387
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,32767,0.16178133090337118
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,32767,0.20189867417017618
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,32767,0.15586666266123453
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,32767,0.16013333201408386
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,32767,0.1218986709912618
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,32767,0.16302399833997092
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,32767,0.13075733184814453
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,32767,0.1525813341140747
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,32767,0.17362666130065918
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,32767,0.17239999771118164
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,32767,0.16820800304412842
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,32767,0.2051466703414917
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,32767,0.17215466499328613
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,32767,0.16519999504089355
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,32767,0.1740000049273173
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,32767,0.1725119948387146
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,32767,0.16327466567357382
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,32767,0.176581343015035
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,32767,0.20347734292348227
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,32767,0.20805333058039346
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,32767,0.17266666889190674
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,32767,0.19843200842539468
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,32767,0.15366933743158975
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,32767,0.16596266627311707
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,32767,0.28138667345046997
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,32767,0.27129600445429486
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,32767,0.2483839988708496
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,32767,0.2616746624310811
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,32767,0.27366934219996136
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,32767,0.2595413327217102
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,32767,0.26657066742579144
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,32767,0.25383466482162476
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,32767,0.2638186613718669
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,32767,0.24742400646209717
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,32767,0.24508267641067505
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,32767,0.2648266752560933
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,32767,0.24605866273244223
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,32767,0.2622133294741313
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,32767,0.4545706510543823
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,32767,0.24659200509389242
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,32767,0.2630400061607361
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,32767,0.47781864802042645
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,32767,0.4910879929860433
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,32767,0.4655359983444214
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,32767,0.529637336730957
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,32767,0.49399999777475995
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,32767,0.44022401173909503
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,32767,0.4410080115000407
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,32767,0.4808053175608317
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,32767,0.4378559986750285
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,32767,0.47909867763519287
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,32767,0.43386133511861164
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,32767,0.47605868180592853
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,32767,0.9335520267486572
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,32767,0.4346986611684163
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,32767,0.47974932193756104
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,32767,0.4360426664352417
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,32767,0.8942879835764567
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,32767,0.8638933499654134
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,32767,0.8253386815388998
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,32767,0.9093333085378011
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,32767,0.898570696512858
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,32767,0.8102186520894369
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,32767,0.8203413486480713
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,32767,0.9291093349456787
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,32767,0.8107626438140869
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,32767,0.9102880160013834
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,32767,0.8218613465627035
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,32767,1.0603626569112141
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,32767,0.8123253186543783
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,32767,1.6013654073079426
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,32767,0.8944799900054932
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,32767,1.5702133178710938
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,32767,0.8120266596476237
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,32767,1.7979307174682617
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,32767,1.6184159914652507
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,32767,1.55839999516805
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,32767,1.5686826705932617
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,32767,1.5460160573323567
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,32767,1.592016061147054
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,32767,1.557146708170573
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,32767,1.579525311787923
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,32767,1.5782987276713054
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,32767,1.548810640970866
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,32767,1.5723679860432942
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,32767,3.1449813842773438
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,32767,1.5715413093566895
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,32767,1.568463961283366
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,32767,1.5551679929097493
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,32767,3.4765332539876304
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,32767,3.1590026219685874
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,32767,3.0885868072509766
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,32767,3.1215572357177734
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,32767,3.0538241068522134
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,32767,3.101477305094401
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,32767,3.040309270222982
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,32767,3.099029223124186
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,32767,3.0809332529703775
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,32767,3.1016852060953775
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,32767,3.0293973286946616
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,32767,3.0951573053995767
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,32767,3.0535147984822593
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,512,1,1,32767,7.013013203938802
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,32767,3.0914827982584634
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,32767,3.070058822631836
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,512,1,2,32767,6.267045338948567
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,512,1,2,32767,6.111114501953125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,512,1,4,32767,6.225200017293294
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,512,1,4,32767,6.0675309499104815
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,512,1,8,32767,6.172538757324219
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,512,1,1,32767,6.214058558146159
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,512,1,8,32767,6.020698547363281
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,512,1,16,32767,6.154853185017903
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,512,1,16,32767,6.107605616251628
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,512,1,128,32767,6.1264692942301435
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,512,1,32,32767,6.121941248575847
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,512,1,32,32767,6.093135833740234
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,512,1,64,32767,6.123130798339844
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,512,1,64,32767,6.031930923461914
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,65535,0.12931733330090842
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,65535,0.10976533095041911
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,512,1,128,32767,5.990805308024089
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,65535,0.13165332873662314
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,65535,0.12802666425704956
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,65535,0.20245333512624106
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,65535,0.15898133317629495
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,65535,0.19844800233840942
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,65535,0.12786666552225748
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,65535,0.199946661790212
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,65535,0.16152000427246094
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,65535,0.12970133622487387
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,65535,0.15596800049146017
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,65535,0.15082666277885437
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,65535,0.16125333309173584
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,65535,0.15621866782506308
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,65535,0.15522666772206625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,65535,0.12770666678746542
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,65535,0.12814399600028992
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,65535,0.12807466586430868
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,65535,0.16661866505940756
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,65535,0.15899733702341715
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,65535,0.20316267013549805
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,65535,0.1593546668688456
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,65535,0.15871466199556986
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,65535,0.13079999883969626
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,65535,0.16214399536450705
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,65535,0.13725333412488303
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,65535,0.16879467169443765
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,65535,0.15544000267982483
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,65535,0.20232532421747842
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,65535,0.15967999895413718
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,65535,0.1591039995352427
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,65535,0.1780853271484375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,65535,0.1741066575050354
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,65535,0.16793600718180338
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,65535,0.1732800006866455
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,65535,0.16828266779581705
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,65535,0.18330667416254678
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,65535,0.2130933403968811
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,65535,0.1824373404184977
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,65535,0.1620213290055593
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,65535,0.15908799568812051
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,65535,0.1604320009549459
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,65535,0.16713599363962808
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,65535,0.17919466892878214
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,65535,0.19745065768559775
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,65535,0.25731732447942096
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,65535,0.17148800690968832
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,65535,0.20664532979329428
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,65535,0.2784213423728943
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,65535,0.26950399080912274
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,65535,0.306442658106486
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,65535,0.27104000250498456
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,65535,0.24754667282104492
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,65535,0.26201067368189496
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,65535,0.2507733305295308
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,65535,0.30109866460164386
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,65535,0.24600533644358316
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,65535,0.26128532489140827
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,65535,0.24570665756861368
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,65535,0.3012106617291768
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,65535,0.24184000492095947
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,65535,0.2599253257115682
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,65535,0.245194673538208
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,65535,0.4883626699447632
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,65535,0.46082135041554767
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,65535,0.48799999554951984
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,65535,0.44961599508921307
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,65535,0.48924267292022705
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,65535,0.4328266779581706
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,65535,0.4788373311360677
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,65535,0.4348906675974528
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,65535,0.47915200392405194
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,65535,0.4344480037689209
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,65535,0.42924801508585614
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,65535,0.553706685702006
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,65535,0.43749332427978516
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,65535,0.47594134012858075
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,65535,0.4291306734085083
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,65535,0.4764106671015422
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,65535,0.9165333112080892
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,65535,0.8460906346638998
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,65535,0.9620479742685953
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,65535,0.8193759918212891
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,65535,0.9388319651285807
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,65535,0.8158506552378336
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,65535,0.9046826362609863
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,65535,0.9091893037160238
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,65535,0.8161066373189291
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,65535,0.816490650177002
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,65535,0.9061760107676188
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,65535,0.8124159971872965
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,65535,1.6119893391927083
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,65535,1.0618346532185872
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,65535,0.8072266578674316
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,65535,0.9050347010294596
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,65535,0.813162644704183
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,65535,1.7216213544209797
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,65535,1.7596960067749023
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,65535,1.585861365000407
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,65535,1.8608160018920898
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,65535,1.5493706067403157
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,65535,1.765386740366618
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,65535,1.5426613489786785
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,65535,1.5431413650512695
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,65535,1.7519520123799641
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,65535,1.5504213968912761
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,65535,1.7502719561258953
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,65535,1.55184539159139
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,65535,1.7584800720214844
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,65535,1.7498559951782227
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,65535,1.5397599538167317
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,65535,3.390202522277832
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,65535,3.1186720530192056
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,65535,3.1191040674845376
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,65535,3.0795679092407227
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,65535,3.104037284851074
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,65535,3.0640106201171875
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,65535,3.100010553995768
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,65535,3.0281012852986655
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,65535,3.096416155497233
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,65535,3.078890800476074
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,65535,3.0969387690226235
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,65535,3.033328056335449
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,65535,3.092874526977539
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,65535,3.0530878702799478
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,65535,3.1031147638956704
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,65535,3.017754554748535
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,256,1,1,65535,6.7805970509847
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,256,1,1,65535,6.193146387736003
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,256,1,2,65535,6.1898454030354815
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,256,1,2,65535,6.052709579467773
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,256,1,4,65535,6.172730763753255
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,256,1,4,65535,6.10261344909668
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,256,1,8,65535,6.1488800048828125
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,256,1,32,65535,6.143952051798503
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,256,1,8,65535,6.041343688964844
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,256,1,16,65535,6.132752100626628
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,256,1,16,65535,6.08238410949707
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,256,1,32,65535,5.978928248087565
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,256,1,64,65535,6.134042739868164
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,256,1,64,65535,6.023183822631836
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,256,1,128,65535,6.124154408772786
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,256,1,128,65535,5.985418955485026
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,1,1,1,131071,0.1561973293622335
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,1,1,1,131071,0.1365013321240743
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,1,1,2,131071,0.16931732495625815
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,1,1,2,131071,0.20043732722600302
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,1,1,4,131071,0.1529813309510549
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,1,1,4,131071,0.16365866859753928
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,1,1,8,131071,0.14889066418011984
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,1,1,8,131071,0.19558932383855185
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,1,1,16,131071,0.14548266927401224
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,1,1,16,131071,0.16538133223851523
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,1,1,32,131071,0.172325332959493
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,1,1,32,131071,0.19647467136383057
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,1,1,64,131071,0.1502293348312378
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,2,1,2,131071,0.1776533325513204
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,1,1,64,131071,0.16356266538302103
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,1,1,128,131071,0.14688533544540405
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,1,1,128,131071,0.19630932807922363
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,2,1,1,131071,0.18264534076054892
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,2,1,1,131071,0.17502933740615845
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,2,1,2,131071,0.2036693294843038
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,2,1,4,131071,0.18251200517018637
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,2,1,4,131071,0.21077332894007364
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,2,1,32,131071,0.2044480045636495
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,2,1,8,131071,0.18206934134165445
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,2,1,8,131071,0.17220799128214517
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,2,1,16,131071,0.16597867012023926
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,2,1,16,131071,0.20283200343449911
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,2,1,32,131071,0.1604746679464976
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,2,1,64,131071,0.16125333309173584
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,4,1,2,131071,0.285045325756073
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,2,1,64,131071,0.20693333943684897
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,2,1,128,131071,0.1683893402417501
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,2,1,128,131071,0.17407999436060587
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,4,1,1,131071,0.29074132442474365
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,4,1,1,131071,0.2717546621958415
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,4,1,2,131071,0.25576533873875934
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,4,1,4,131071,0.3170986572901408
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,4,1,4,131071,0.25176000595092773
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,4,1,8,131071,0.3174026608467102
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,4,1,64,131071,0.3144853313763936
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,4,1,64,131071,0.24770132700602213
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,4,1,8,131071,0.2502506573994954
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,4,1,16,131071,0.31491732597351074
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,4,1,16,131071,0.2519306739171346
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,4,1,32,131071,0.2809866666793823
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,4,1,32,131071,0.24475733439127603
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,4,1,128,131071,0.27695999542872113
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,4,1,128,131071,0.24648000796635947
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,8,1,1,131071,0.48230401674906415
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,8,1,1,131071,0.46002666155497235
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,8,1,2,131071,0.4894239902496338
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,8,1,2,131071,0.44809067249298096
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,8,1,4,131071,0.47750401496887207
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,8,1,4,131071,0.4333440065383911
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,8,1,8,131071,0.5586453278859457
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,8,1,8,131071,0.4366453488667806
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,8,1,16,131071,0.557151993115743
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,8,1,16,131071,0.43511998653411865
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,8,1,32,131071,0.5523519913355509
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,8,1,32,131071,0.43928531805674237
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,8,1,64,131071,0.5521706740061442
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,8,1,64,131071,0.42718398571014404
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,16,1,2,131071,1.0657440026601155
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,16,1,2,131071,0.8223520119984945
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,16,1,4,131071,0.8951679865519205
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,8,1,128,131071,0.4802773396174113
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,8,1,128,131071,0.43411731719970703
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,16,1,1,131071,0.9059840043385824
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,16,1,1,131071,0.8338879744211832
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,16,1,32,131071,0.9085919857025146
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,16,1,32,131071,0.8022613525390625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,16,1,4,131071,0.8038453261057535
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,16,1,64,131071,0.9069279829661051
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,16,1,64,131071,0.8074346383412679
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,16,1,128,131071,0.9079573154449463
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,16,1,8,131071,0.9060373306274414
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,16,1,8,131071,0.8167839845021566
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,16,1,16,131071,0.8995306491851807
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,16,1,16,131071,0.8155573209126791
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,16,1,128,131071,0.7964213689168295
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,32,1,1,131071,1.754207928975423
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,32,1,1,131071,1.5976373354593914
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,32,1,2,131071,1.8135466575622559
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,32,1,2,131071,1.5664374033610027
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,32,1,16,131071,1.5628852844238281
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,32,1,4,131071,1.9192426999409993
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,32,1,4,131071,1.5439893404642742
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,32,1,8,131071,1.8098613421122234
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,32,1,8,131071,1.537882645924886
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,32,1,128,131071,1.749567985534668
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,32,1,16,131071,1.7649760246276855
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,32,1,32,131071,2.0771306355794272
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,32,1,32,131071,1.5528799692789714
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,32,1,64,131071,1.7447199821472168
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,32,1,64,131071,1.562837282816569
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,32,1,128,131071,1.5645653406778972
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,64,1,1,131071,3.355519930521647
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,64,1,1,131071,3.0770400365193686
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,64,1,2,131071,3.676581382751465
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,64,1,2,131071,3.0650879542032876
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,64,1,4,131071,3.876309394836426
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,64,1,4,131071,3.0543254216512046
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,64,1,8,131071,3.4535627365112305
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,64,1,8,131071,3.0490239461263022
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,64,1,16,131071,3.4509385426839194
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,64,1,16,131071,3.0176798502604165
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,64,1,32,131071,3.463024139404297
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,64,1,128,131071,3.064271926879883
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,64,1,32,131071,3.0445067087809243
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,64,1,64,131071,4.075285275777181
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,64,1,64,131071,3.069941202799479
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,64,1,128,131071,3.414112091064453
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,128,128,1,1,131071,6.714986801147461
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,128,128,1,1,131071,6.032304128011067
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,64,128,1,2,131071,6.166757583618164
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,64,128,1,2,131071,6.075466791788737
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,8,128,1,16,131071,6.140309015909831
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,32,128,1,4,131071,6.1549224853515625
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,32,128,1,4,131071,5.991749445597331
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,16,128,1,8,131071,6.141450881958008
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,16,128,1,8,131071,6.076000213623047
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,8,128,1,16,131071,5.980325063069661
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,4,128,1,32,131071,6.137461344401042
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,4,128,1,32,131071,6.010837554931641
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,2,128,1,64,131071,6.138250350952148
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,2,128,1,64,131071,5.970794677734375
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flash_attn_mla,float16,float16,1,128,1,128,131071,6.1383412679036455
VLLM,0.14.0,NVIDIA H100 80GB HBM3,generation_mla,vllm_flashmla,float16,fp8,1,128,1,128,131071,6.02504030863444
