framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,1,4,0,0.5023519992828369
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,1,2,0,0.458026647567749
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,1,8,0,0.32867733637491864
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,1,2,0,0.49798401196797687
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,1,1,0,0.5586986541748047
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,1,4,0,0.33214400211970013
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,1,8,0,0.36050665378570557
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,1,32,0,0.334170659383138
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,1,16,0,0.3489813407262166
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,1,16,0,0.5581653515497843
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,1,32,0,0.5439786513646444
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,1,1,0,0.3498239914576213
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,1,1,0,0.38812267780303955
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,1,4,0,0.31985066334406537
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,1,2,0,0.34652264912923175
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,1,8,0,0.35569600264231366
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,1,16,0,0.34669331709543866
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,1,32,0,0.3503893216451009
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,1,1,0,0.3496319850285848
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,1,2,0,0.3211626609166463
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,1,8,0,0.3465706507364909
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,1,4,0,0.35097066561381024
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,1,16,0,0.3521546522776286
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,1,32,0,0.3125813404719035
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,1,64,0,0.3102133274078369
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,1,64,0,0.3125866651535034
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,1,64,0,0.35917333761850995
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,16,1,1,0,0.34031999111175537
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,16,1,4,0,0.368613322575887
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,16,1,2,0,0.3471946716308594
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,16,1,8,0,0.46329601605733234
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,16,1,16,0,0.3510613441467285
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,16,1,32,0,0.34043200810750324
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,16,1,64,0,0.4766453504562378
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,1,64,0,0.3500800132751465
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,32,1,1,0,0.35450132687886554
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,32,1,4,0,0.3238240083058675
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,32,1,2,0,0.4326186577479045
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,32,1,8,0,0.4169813394546509
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,32,1,16,0,0.34513600667317706
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,32,1,32,0,0.4082239866256714
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,32,1,64,0,0.3892853260040283
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,64,1,1,0,0.3087093234062195
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,64,1,2,0,0.3141813278198242
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,64,1,4,0,0.3044426639874776
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,64,1,8,0,0.33618664741516113
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,64,1,16,0,0.3048906723658244
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,64,1,32,0,0.3384693463643392
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,64,1,64,0,0.3329813281695048
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,128,1,1,0,0.3076373338699341
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,128,1,2,0,0.30850134293238324
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,128,1,4,0,0.3415733178456624
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,1,128,0,0.5759893258412679
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,16,1,128,0,0.5324960152308146
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,128,1,8,0,0.30320000648498535
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,128,1,64,0,0.33389333883921307
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,128,1,16,0,0.3139253258705139
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,64,1,128,0,0.50764266649882
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,128,1,32,0,0.31173866987228394
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,32,1,128,0,0.43942399819691974
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,128,1,128,0,0.30612266063690186
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,256,1,4,0,0.3989439805348714
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,256,1,2,0,0.3130613366762797
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,256,1,1,0,0.6725172996520996
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,256,1,8,0,0.3471946716308594
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,256,1,16,0,0.33346132437388104
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,16,1,0,0.25380800167719525
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,16,2,0,0.41923733552296955
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,16,8,0,0.2845279971758525
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,256,1,128,0,0.3749973376592
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,256,1,32,0,0.35253334045410156
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,16,4,0,0.2796853383382161
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,256,1,64,0,0.3575040102005005
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,16,16,0,0.28191999594370526
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,16,32,0,0.2799253265062968
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,16,128,0,0.26337067286173504
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,16,64,0,2.027520020802816
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,16,2,0,0.278165340423584
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,16,4,0,0.28651199738184613
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,16,1,0,0.28015466531117755
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,16,32,0,0.2836586634318034
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,16,8,0,0.2775893410046895
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,16,64,0,0.28014399607976276
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,16,16,0,1.0168053309122722
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,16,128,0,0.2653759916623433
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,16,1,0,0.2802240053812663
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,16,2,0,0.28146666288375854
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,16,8,0,0.2948426604270935
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,16,16,0,0.2786826690038045
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,16,4,0,0.2622133294741313
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,16,128,0,0.26495466629664105
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,16,32,0,2.3231147130330405
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,16,64,0,0.28728000322977704
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,16,2,0,0.2698773344357808
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,16,1,0,0.29497599601745605
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,16,4,0,0.276853342851003
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,16,8,0,0.2779039939244588
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,16,16,0,0.2797279953956604
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,16,32,0,0.28101332982381183
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,16,128,0,0.2493706742922465
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,16,64,0,0.2834453384081523
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,16,16,4,0,0.2773279945055644
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,16,16,8,0,0.2815893292427063
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,16,16,1,0,0.280623992284139
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,16,16,2,0,0.25677865743637085
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,16,16,16,0,0.27799999713897705
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,16,16,32,0,0.2749759952227275
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,16,16,64,0,0.2892213265101115
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,16,16,128,0,0.24314665794372559
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,32,16,4,0,0.2769813338915507
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,32,16,2,0,0.2804800073305766
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,32,16,1,0,0.40626664956410724
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,32,16,8,0,0.2826240062713623
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,32,16,16,0,0.27194666862487793
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,1,128,0,0.341594656308492
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,32,16,64,0,0.3174560070037842
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,32,16,32,0,0.28119999170303345
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,32,16,128,0,0.2618666688601176
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,64,16,1,0,0.9461759726206461
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,64,16,8,0,0.2778506676355998
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,64,16,4,0,0.28861866394678753
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,64,16,16,0,0.27526400486628216
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,64,16,2,0,0.3724906841913859
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,1,128,0,0.33803733189900714
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,64,16,128,0,0.2596106727917989
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,64,16,32,0,0.2732800046602885
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,64,16,64,0,0.28415467341740924
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,128,16,2,0,0.926736036936442
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,128,16,4,0,0.36372800668080646
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,1,128,0,0.346837321917216
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,128,16,32,0,0.27512532472610474
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,128,16,1,0,2.129530588785807
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,128,16,8,0,0.2839360038439433
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,128,16,16,0,0.2571946581204732
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,128,16,64,0,0.27564799785614014
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,128,16,128,0,0.2566240032513936
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,256,16,8,0,0.4099786678949992
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,256,16,32,0,0.2873493234316508
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,256,16,64,0,0.2807360092798869
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,256,16,16,0,0.27985600630442303
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,256,16,128,0,0.2632213234901428
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,32,1,0,1.4079945882161458
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,256,16,4,0,0.8924533526102701
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,32,2,0,0.2827039957046509
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,32,4,0,0.27115732431411743
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,32,8,0,0.2550719976425171
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,256,16,2,0,2.1120479901631675
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,32,32,0,0.2760000030199687
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,256,16,1,0,4.382346789042155
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,32,16,0,1.275386651357015
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,32,64,0,1.359114646911621
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,32,128,0,0.2743893265724182
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,32,1,0,0.28462932507197064
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,32,2,0,0.2799359957377116
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,32,4,0,0.28165332476298016
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,32,8,0,0.8263200124104818
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,32,64,0,0.2864426573117574
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,32,16,0,0.29151999950408936
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,32,32,0,0.28003732363382977
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,32,2,0,0.27435733874638873
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,32,128,0,0.9144213199615479
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,32,1,0,0.26267733176549274
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,32,4,0,0.25753599405288696
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,32,8,0,0.2825919985771179
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,32,16,0,0.27826132376988727
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,32,32,0,0.6377546787261963
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,32,64,0,0.27926933765411377
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,32,128,0,0.2624266743659973
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,32,2,0,0.27811199426651
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,32,4,0,0.3306079904238383
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,32,1,0,0.2834773262341817
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,32,16,0,0.7115200360616049
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,32,128,0,0.2706186572710673
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,32,64,0,0.28699199358622235
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,32,8,0,0.285866657892863
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,32,32,0,0.2825760046641032
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,16,32,2,0,0.2805866599082947
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,16,32,1,0,0.36899201075236004
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,16,32,4,0,0.26022932926813763
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,16,32,32,0,0.28546667098999023
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,16,32,64,0,0.2768106659253438
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,16,32,8,0,0.28334399064381915
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,16,32,128,0,0.2458720008532206
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,16,32,16,0,0.2798080046971639
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,32,32,1,0,0.9552373091379801
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,32,32,4,0,0.2847999930381775
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,32,32,2,0,0.3469333251317342
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,32,32,32,0,0.2778186599413554
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,32,32,64,0,0.2863253355026245
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,32,32,8,0,0.27693865696589154
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,32,32,16,0,0.283786674340566
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,32,32,128,0,0.28356266021728516
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,64,32,2,0,0.9261439641316732
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,64,32,4,0,0.3352266550064087
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,64,32,8,0,0.2890453338623047
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,64,32,32,0,0.27957866589228314
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,64,32,16,0,0.8554346561431885
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,64,32,64,0,0.27686933676401776
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,64,32,1,0,2.149120012919108
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,64,32,128,0,0.26132800181706745
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,128,32,8,0,0.3977760076522827
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,128,32,4,0,0.9282933076222738
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,128,32,16,0,0.27000532547632855
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,128,32,2,0,2.127770741780599
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,128,32,32,0,0.26871999104817706
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,128,32,1,0,4.403461456298828
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,128,32,64,0,0.28242133061091107
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,128,32,128,0,0.2646453380584717
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,256,32,8,0,0.9167839686075846
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,256,32,16,0,0.38254932562510174
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,256,32,32,0,0.26239466667175293
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,256,32,4,0,2.074410597483317
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,256,32,2,0,4.351600011189778
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,64,2,0,0.2839146653811137
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,64,1,0,0.2781386574109395
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,256,32,1,0,8.944069544474283
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,64,4,0,0.27897600332895917
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,64,8,0,0.281061331431071
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,64,16,0,0.290608008702596
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,64,32,0,0.27888532479604083
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,256,32,128,0,0.2706666588783264
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,64,64,0,0.3022186756134033
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,256,32,64,0,1.3317386309305828
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,64,128,0,0.24469866355260214
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,64,4,0,0.284277339776357
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,64,2,0,0.25408534208933514
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,64,1,0,0.2629866600036621
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,64,16,0,0.27585067351659137
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,64,64,0,0.30569066603978473
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,64,128,0,0.26844267050425213
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,64,32,0,0.7497173150380453
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,64,8,0,0.2909333308537801
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,64,2,0,0.3028106689453125
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,64,1,0,0.288373331228892
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,64,4,0,0.28123732407887775
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,64,32,0,0.2799359957377116
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,64,16,0,0.3031040032704671
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,64,8,0,0.2776906689008077
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,64,128,0,0.2882240017255147
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,64,64,0,0.32524800300598145
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,64,4,0,0.28465066353480023
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,64,1,0,0.37390931447347003
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,64,2,0,0.26442132393519086
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,64,8,0,0.2813173333803813
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,64,32,0,1.0978506406148274
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,64,64,0,0.2829226652781169
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,64,16,0,0.28651199738184613
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,64,128,0,0.2659839987754822
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,16,64,2,0,0.3453226486841838
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,16,64,4,0,0.2860640088717143
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,16,64,1,0,0.9337653319040934
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,16,64,8,0,0.2777973413467407
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,16,64,16,0,0.25805334250132245
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,16,64,64,0,0.27291200558344525
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,16,64,32,0,0.2808799942334493
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,16,64,128,0,0.2625760038693746
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,32,64,4,0,0.3320533235867818
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,32,64,2,0,0.9258293310801188
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,32,64,1,0,2.1925226847330728
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,32,64,16,0,1.5742932955423992
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,32,64,64,0,0.2842613259951274
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,32,64,32,0,0.29443200429280597
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,32,64,8,0,0.2978773315747579
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,32,64,128,0,0.2680000066757202
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,64,64,8,0,0.39131200313568115
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,64,64,4,0,0.8942826588948568
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,64,64,1,0,4.456464131673177
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,64,64,16,0,0.3124799927075704
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,64,64,32,0,0.28068800767262775
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,64,64,64,0,0.28171734015146893
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,64,64,2,0,2.119210720062256
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,64,64,128,0,0.2453920046488444
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,128,64,16,0,0.402890682220459
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,128,64,32,0,0.29039466381073
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,128,64,8,0,0.9022400379180908
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,128,64,4,0,2.082207997639974
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,128,64,64,0,0.29130667448043823
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,128,64,128,0,0.26389867067337036
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,128,64,2,0,4.3314774831136065
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,128,64,1,0,8.916997273763021
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,256,64,8,0,2.142752011617025
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,256,64,32,0,0.44179733594258624
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,256,64,16,0,0.9494773546854655
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,128,2,0,0.2760639985402425
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,128,4,0,0.2674720088640849
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,128,1,0,0.29952534039815265
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,256,64,4,0,4.3049014409383135
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,128,8,0,0.27719465891520184
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,256,64,2,0,8.81059201558431
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,128,64,0,0.2939573327700297
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,128,32,0,0.27955732742945355
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,128,16,0,0.27803200483322144
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,256,64,64,0,0.2624533375104268
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,256,64,128,0,0.2947946588198344
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,128,128,0,0.26739199956258136
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,128,4,0,0.28894933064778644
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,128,8,0,0.9284853140513102
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,128,16,0,0.3184640010197957
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,128,1,0,0.30505067110061646
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,256,64,1,0,17.970357259114582
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,128,2,0,0.27832533915837604
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,128,32,0,0.2802186608314514
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,128,4,0,0.2802506685256958
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,128,64,0,0.28570665915807086
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,128,8,0,0.2845653295516968
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,128,16,0,0.28756799300511676
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,128,1,0,0.3733439842859904
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,128,128,0,0.26945066452026367
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,128,2,0,0.27026132742563885
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,128,32,0,0.2825813293457031
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,128,128,0,0.2711946765581767
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,128,64,0,1.6333813667297363
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,128,4,0,0.28963200251261395
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,128,2,0,0.5690773328145345
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,128,16,0,0.28221867481867474
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,128,8,0,0.28114134073257446
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,128,32,0,0.28733332951863605
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,128,1,0,0.9386346340179443
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,128,64,0,0.28803199529647827
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,128,128,0,0.27530133724212646
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,16,128,2,0,0.925605297088623
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,16,128,4,0,0.4788320064544678
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,16,128,16,0,0.2641333341598511
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,16,128,8,0,0.29126399755477905
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,16,128,1,0,2.142965316772461
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,16,128,128,0,0.9704373677571615
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,16,128,32,0,0.28837867577870685
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,16,128,64,0,0.2836266756057739
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,32,128,4,0,0.8879626592000326
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,32,128,2,0,2.109226703643799
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,32,128,32,0,0.2840213378270467
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,32,128,8,0,0.38602133591969806
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,32,128,16,0,1.4187733332316081
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,32,128,1,0,4.425584157307942
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,32,128,64,0,0.2846826712290446
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,32,128,128,0,0.26477332909901935
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,64,128,16,0,0.40092798074086505
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,64,128,64,0,0.3170880079269409
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,64,128,8,0,0.9028106530507406
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,64,128,4,0,2.0670293172200522
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,64,128,32,0,0.2849386731783549
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,64,128,2,0,4.3420000076293945
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,64,128,128,0,0.2598399917284648
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,64,128,1,0,8.955504099527994
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,128,128,16,0,0.946880022684733
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,128,128,32,0,0.43995734055836994
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,128,128,4,0,4.288895924886067
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,128,128,8,0,2.1014134089152017
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,128,128,64,0,0.2603093385696411
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,128,128,2,0,8.89744504292806
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,128,128,128,0,0.26316799720128375
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,128,128,1,0,18.008628845214844
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,256,128,16,0,2.175600051879883
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,256,128,8,0,4.351792017618815
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,256,128,32,0,1.007578690846761
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,256,128,4,0,8.67629305521647
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,256,1,0,0.2948480049769084
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,256,2,0,0.2695466677347819
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,256,4,0,0.2829866607983907
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,256,16,0,0.280458668867747
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,256,128,64,0,0.5095200141270956
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,256,8,0,0.268885334332784
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,256,128,128,0,0.2969706654548645
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,256,32,0,0.27777600288391113
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,256,64,0,0.2985919912656148
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,256,128,2,0,18.032394409179688
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,256,128,0,0.7700800100962321
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,256,4,0,0.2834773262341817
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,256,2,0,0.29078932603200275
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,256,8,0,0.2760000030199687
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,256,1,0,1.315834681193034
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,256,16,0,0.2789120078086853
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,256,128,0,0.278602659702301
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,256,64,0,0.28277866045633954
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,256,32,0,0.2940746744473775
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,256,2,0,0.3643733263015747
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,256,4,0,0.26369067033131915
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,256,8,0,0.26395734151204425
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,256,64,0,0.2879626750946045
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,256,16,0,0.9092106819152832
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,256,128,0,0.3556266625722249
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,256,1,0,1.0044586658477783
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,256,32,0,0.2833706736564636
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,256,2,0,0.9511520067850748
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,256,4,0,0.33319999774297077
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,256,1,0,2.1629172960917153
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,256,32,0,0.27711466948191327
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,256,128,1,0,36.191001892089844
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,256,16,0,1.9524319966634114
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,256,8,0,0.28754132986068726
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,256,64,0,0.27752532561620075
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,256,128,0,0.27305599053700763
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,16,256,16,0,0.2802880009015401
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,16,256,64,0,0.28381866216659546
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,16,256,8,0,0.36216533184051514
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,16,256,128,0,0.9554346402486166
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,16,256,32,0,0.2857866684595744
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,16,256,4,0,0.8940693537394205
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,16,256,1,0,4.390421231587728
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,16,256,2,0,2.1187413533528647
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,32,256,32,0,0.28171199560165405
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,32,256,16,0,0.37667731444040936
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,32,256,8,0,0.9136799971262614
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,32,256,4,0,2.0742185910542807
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,32,256,64,0,0.3036213318506877
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,32,256,128,0,0.24602667490641275
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,32,256,2,0,4.330122629801433
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,64,256,16,0,0.9519146283467611
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,64,256,32,0,0.44233067830403644
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,32,256,1,0,8.868725458780924
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,64,256,8,0,2.1107892990112305
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,64,256,4,0,4.298357327779134
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,64,256,64,0,0.2863573431968689
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,64,256,128,0,0.2598186731338501
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,64,256,2,0,8.787290573120117
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,128,256,16,0,2.1868106524149575
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,128,256,8,0,4.359898567199707
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,128,256,32,0,1.0289493401845295
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,64,256,1,0,18.047242482503254
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,128,256,64,0,0.5034933487574259
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,128,256,4,0,8.671162923177084
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,128,256,128,0,0.3030666708946228
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,128,256,2,0,18.01632563273112
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,256,256,16,0,4.5760800043741865
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,256,256,8,0,8.947760264078775
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,256,256,32,0,2.42032527923584
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,512,1,0,0.3742773135503133
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,512,2,0,0.28067733844121295
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,128,256,1,0,36.13853963216146
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,256,256,4,0,17.73812739054362
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,512,8,0,0.2792213360468547
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,256,256,64,0,1.2359200318654378
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,512,4,0,0.27956799666086835
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,256,256,128,0,0.679807980855306
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,512,16,0,0.28735466798146564
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,512,128,0,0.27034666140874225
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,512,32,0,0.28593599796295166
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,512,2,0,0.3753013213475545
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,512,64,0,0.28116800387700397
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,512,4,0,0.9926186402638754
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,512,1,0,0.955077330271403
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,512,8,0,0.28437334299087524
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,512,16,0,0.27974933385849
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,512,32,0,0.2826186617215474
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,512,64,0,0.28588799635569256
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,512,128,0,0.2674720088640849
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,512,16,0,0.28321067492167157
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,512,8,0,0.2640053431193034
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,512,4,0,0.3608959913253784
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,512,2,0,0.989578644434611
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,512,32,0,0.28835733731587726
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,512,64,0,0.2879839936892192
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,256,256,2,0,35.963706970214844
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,512,1,0,2.2032373746236167
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,512,128,0,0.2701813379923503
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,512,8,0,0.39671464761098224
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,512,32,0,0.28801600138346356
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,512,16,0,0.2791733344395955
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,512,4,0,0.903498649597168
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,512,2,0,2.109920024871826
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,512,1,0,4.428741455078125
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,512,64,0,0.32635732491811115
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,512,128,0,0.2625386714935303
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,16,512,32,0,0.2885599931081136
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,16,512,2,0,4.437109311421712
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,16,512,8,0,0.9513920148213705
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,16,512,16,0,0.4068586826324463
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,16,512,4,0,2.0749173164367676
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,16,512,64,0,0.28282666206359863
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,16,512,128,0,0.2581546703974406
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,16,512,1,0,8.89357884724935
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,32,512,16,0,0.9648106892903646
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,32,512,8,0,2.1213760375976562
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,32,512,4,0,4.299066543579102
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,32,512,32,0,0.4416586558024089
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,32,512,128,0,0.25748799244562787
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,32,512,64,0,0.28990934292475384
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,32,512,2,0,8.72111447652181
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,64,512,16,0,2.205952008565267
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,64,512,8,0,4.387525240580241
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,64,512,32,0,1.0321119626363118
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,64,512,4,0,8.72649065653483
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,64,512,64,0,0.516538659731547
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,32,512,1,0,17.99294916788737
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,256,256,1,0,72.78268941243489
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,64,512,128,0,0.31509333848953247
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,64,512,2,0,17.77352015177409
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,128,512,16,0,4.596901257832845
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,128,512,8,0,8.970149358113607
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,128,512,32,0,2.4192479451497397
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,128,512,128,0,0.6929919719696045
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,1024,2,0,0.3747733434041341
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,1024,4,0,0.2839413285255432
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,1024,1,0,0.9816586971282959
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,1024,8,0,0.2755519946416219
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,128,512,64,0,1.2491573492685955
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,1024,16,0,0.2774453361829122
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,1024,32,0,0.2617866595586141
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,128,512,4,0,17.789872487386067
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,64,512,1,0,36.17818705240885
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,1024,64,0,0.2855839927991231
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,1024,128,0,0.26157333453496295
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,1024,2,0,0.9586026668548584
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,1024,1,0,2.188506603240967
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,1024,4,0,0.38735465208689374
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,1024,16,0,0.2818293372790019
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,1024,8,0,0.2909333308537801
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,1024,32,0,0.28189865748087567
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,1024,64,0,1.2523787021636963
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,1024,128,0,0.2744426727294922
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,1024,8,0,0.4293919801712036
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,1024,4,0,0.9218239784240723
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,1024,16,0,0.28385599454243976
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,1024,2,0,2.1320212682088218
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,1024,32,0,0.27723199129104614
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,1024,64,0,0.2831786672274272
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,1024,128,0,0.2641706665356954
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,1024,1,0,4.370277404785156
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,128,512,2,0,36.3468271891276
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,1024,8,0,0.9475200176239014
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,1024,16,0,0.4268266757329305
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,1024,32,0,0.2899679938952128
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,1024,4,0,2.093066692352295
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,1024,128,0,0.26602667570114136
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,1024,64,0,0.285258670647939
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,1024,2,0,4.400005340576172
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,1024,1,0,8.969151814778646
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,16,1024,8,0,2.146538734436035
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,16,1024,16,0,0.9854400157928467
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,16,1024,32,0,0.46726401646931964
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,16,1024,64,0,0.29319467147191364
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,16,1024,4,0,4.34170659383138
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,16,1024,128,0,0.26154132684071857
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,16,1024,2,0,8.853802363077799
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,32,1024,16,0,2.227194627126058
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,32,1024,8,0,4.421413421630859
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,32,1024,32,0,1.058186690012614
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,32,1024,4,0,8.716490427652994
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,32,1024,64,0,0.5589226484298706
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,32,1024,128,0,0.34990934530893963
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,16,1024,1,0,17.926634470621746
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,32,1024,2,0,17.57077916463216
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,128,512,1,0,72.76525370279948
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,64,1024,8,0,9.00772794087728
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,64,1024,16,0,4.62497615814209
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,64,1024,32,0,2.4438613255818686
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,64,1024,64,0,1.2756000359853108
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,64,1024,128,0,0.7322666645050049
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,64,1024,4,0,17.84424591064453
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,1536,2,0,0.7181013425191244
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,1536,1,0,1.5865333875020344
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,1536,4,0,0.3272533416748047
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,1536,16,0,0.2845226724942525
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,1536,32,0,0.27724266052246094
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,1536,8,0,0.2831733425458272
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,1536,64,0,0.2618880073229472
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,1536,128,0,0.24302399158477783
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,32,1024,1,0,36.070597330729164
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,1536,8,0,0.3619360129038493
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,1536,4,0,1.3607840538024902
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,1536,16,0,0.28651199738184613
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,1536,32,0,0.273637334505717
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,1536,1,0,3.391183853149414
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,1536,2,0,1.5880799293518066
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,1536,64,0,0.278330663839976
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,1536,128,0,0.28654932975769043
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,1536,8,0,0.7148586908976237
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,1536,16,0,0.8612746397654215
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,64,1024,2,0,36.21690114339193
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,1536,4,0,1.480288028717041
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,1536,32,0,0.2824479937553406
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,1536,64,0,0.2807199954986572
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,1536,128,0,0.2696373263994853
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,1536,2,0,3.4428799947102866
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,1536,1,0,6.536954879760742
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,1536,4,0,3.2421013514200845
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,1536,32,0,0.4292159875233968
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,1536,16,0,0.7440480391184489
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,1536,8,0,1.5746347109476726
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,1536,128,0,0.27002666393915814
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,1536,2,0,6.5053863525390625
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,1536,64,0,0.2823786735534668
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,16,1536,16,0,1.7137120564778645
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,16,1536,8,0,3.317824045817057
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,16,1536,32,0,0.8309600353240967
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,1536,1,0,13.363812764485678
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,16,1536,4,0,6.542826970418294
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,16,1536,64,0,0.41686399777730304
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,16,1536,128,0,0.284277339776357
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,16,1536,2,0,13.237930297851562
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,32,1536,8,0,6.747141520182292
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,32,1536,16,0,3.483743985493978
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,32,1536,32,0,1.8351200421651204
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,32,1536,128,0,0.561408003171285
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,32,1536,64,0,0.938373327255249
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,32,1536,4,0,13.277498881022135
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,64,1024,1,0,72.71254475911458
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,2048,8,0,0.28170132637023926
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,2048,4,0,0.47274665037790936
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,16,1536,1,0,27.109291076660156
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,2048,2,0,1.0724746386210124
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,2048,1,0,2.296991984049479
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,2048,16,0,0.8141280015309652
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,2048,32,0,0.2860479950904846
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,2048,64,0,0.27774399518966675
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,2048,128,0,0.2789173324902852
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,2048,8,0,0.5171093146006266
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,2048,4,0,1.0000373522440593
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,2048,2,0,2.2210559844970703
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,2048,16,0,0.2998560070991516
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,2048,32,0,0.2828800082206726
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,32,1536,2,0,26.40649668375651
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,2048,64,0,0.29125332832336426
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,2048,128,0,0.269978662331899
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,2048,1,0,4.679242769877116
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,2048,32,0,0.3089546759923299
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,2048,16,0,0.5543893178304037
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,2048,4,0,2.165829340616862
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,2048,8,0,1.0112160046895344
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,2048,2,0,4.488197326660156
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,2048,64,0,0.28300267457962036
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,2048,128,0,0.24759999910990396
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,2048,1,0,8.960245132446289
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,2048,16,0,1.0832959810892742
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,2048,8,0,2.198847929636637
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,2048,32,0,0.5775733391443888
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,2048,4,0,4.42523193359375
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,2048,64,0,0.34090133508046466
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,2048,128,0,0.2699733376502991
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,2048,2,0,8.935487747192383
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,16,2048,16,0,2.2904586791992188
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,16,2048,8,0,4.502314567565918
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,16,2048,32,0,1.128432035446167
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,16,2048,64,0,0.6472426652908325
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,16,2048,4,0,8.82479985555013
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,2048,1,0,17.928106943766277
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,16,2048,128,0,0.42309868335723877
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,32,1536,1,0,54.4754892985026
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,16,2048,2,0,18.11846923828125
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,32,2048,16,0,4.705600102742513
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,32,2048,8,0,9.131914774576822
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,32,2048,128,0,0.8482133547465006
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,32,2048,32,0,2.514693260192871
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,32,2048,64,0,1.4070879618326824
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,3072,8,0,0.5039573510487875
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,3072,2,0,1.8646133740743
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,3072,4,0,0.9130720297495524
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,3072,1,0,3.9270826975504556
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,3072,16,0,0.2991786599159241
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,3072,32,0,0.2853386600812276
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,32,2048,4,0,17.930506388346355
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,3072,64,0,0.28936533133188885
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,3072,128,0,0.25918400287628174
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,16,2048,1,0,35.74788157145182
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,3072,8,0,0.9286080201466879
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,3072,16,0,0.5233813524246216
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,3072,4,0,1.8656479517618816
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,3072,2,0,3.888885180155436
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,3072,32,0,0.3033813238143921
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,3072,64,0,0.2900159955024719
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,3072,128,0,0.27298132578531903
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,3072,1,0,7.717295964558919
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,3072,8,0,1.893957297007243
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,3072,4,0,3.8443520863850913
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,3072,16,0,0.9606239795684814
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,3072,32,0,0.5377066532770792
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,32,2048,2,0,35.73204803466797
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,3072,64,0,0.3071413238843282
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,3072,128,0,0.2722986737887065
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,3072,2,0,7.694175720214844
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,3072,16,0,1.96451203028361
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,3072,8,0,3.898122787475586
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,3072,32,0,1.015605370203654
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,3072,64,0,0.5672906637191772
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,3072,1,0,15.352656046549479
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,3072,4,0,7.685050964355469
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,3072,128,0,0.3744959831237793
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,3072,2,0,15.364885965983072
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,16,3072,32,0,2.0622666676839194
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,16,3072,8,0,7.822357177734375
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,16,3072,16,0,4.1622772216796875
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,16,3072,64,0,1.1314506530761719
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,16,3072,128,0,0.6976586977640787
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,16,3072,4,0,15.439467112223307
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,4096,4,0,1.3832160631815593
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,4096,2,0,2.9309921264648438
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,4096,8,0,0.7667520046234131
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,4096,16,0,0.42508800824483234
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,4096,32,0,0.27552000681559247
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,3072,1,0,31.23748270670573
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,4096,64,0,0.29717334111531574
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,4096,1,0,5.856538772583008
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,4096,128,0,0.29210132360458374
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,32,2048,1,0,73.03661092122395
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,4096,8,0,1.4000159899393718
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,4096,16,0,0.7601919968922933
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,4096,4,0,2.8718347549438477
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,4096,64,0,0.3030239939689636
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,4096,32,0,0.42981334527333576
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,4096,128,0,0.26580266157786053
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,4096,2,0,5.868693033854167
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,16,3072,2,0,30.832234700520832
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,4096,8,0,2.9123360315958657
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,4096,16,0,1.434186617533366
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,4096,1,0,11.686271667480469
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,4096,4,0,5.797136306762695
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,4096,32,0,0.7994559605916342
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,4096,128,0,0.29260265827178955
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,4096,64,0,0.4789546728134155
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,4096,2,0,11.730672200520834
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,4096,16,0,3.0102612177530923
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,4096,8,0,5.857418696085612
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,4096,32,0,1.5058879852294922
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,4096,64,0,0.8885173002878824
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,4096,128,0,0.5658666690190634
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,4096,4,0,11.622586568196615
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,4096,1,0,23.466506958007812
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,16,3072,1,0,63.298787434895836
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,4096,2,0,23.328447977701824
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,16,4096,64,0,1.723962624867757
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,16,4096,32,0,3.3588266372680664
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,16,4096,16,0,6.056506474812825
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,16,4096,128,0,1.0651893615722656
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,16,4096,8,0,11.938682556152344
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,6144,8,0,1.386090596516927
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,6144,2,0,5.465221405029297
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,6144,4,0,2.672602653503418
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,6144,16,0,0.7874720096588135
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,6144,32,0,0.4844906727472941
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,6144,128,0,0.3491946856180827
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,6144,64,0,0.36511464913686115
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,16,4096,4,0,23.704004923502605
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,6144,1,0,11.04543940226237
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,6144,16,0,1.4238026936848958
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,6144,8,0,2.7408854166666665
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,6144,4,0,5.409178415934245
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,6144,32,0,0.8281013170878092
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,6144,64,0,0.4931573470433553
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,4096,1,0,48.66199239095052
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,6144,128,0,0.368831992149353
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,6144,2,0,11.107402801513672
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,6144,8,0,5.471802393595378
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,6144,16,0,2.7504428227742515
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,6144,32,0,1.5099306106567383
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,6144,64,0,0.8341173330942789
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,16,4096,2,0,48.1339111328125
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,6144,128,0,0.5472106536229452
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,6144,4,0,10.831461588541666
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,6144,1,0,21.905131022135418
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,6144,2,0,22.5122553507487
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,6144,8,0,11.127184549967447
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,6144,16,0,5.665258407592773
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,6144,32,0,2.951823870340983
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,6144,64,0,1.6222507158915203
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,6144,128,0,0.9728373686472574
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,6144,4,0,22.28991444905599
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,8192,8,0,2.1936160723368325
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,8192,4,0,4.460538546244304
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,8192,16,0,1.2822240193684895
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,8192,2,0,9.006538391113281
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,8192,32,0,0.7179520130157471
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,6144,1,0,45.18037414550781
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,8192,64,0,0.4987200101216634
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,8192,128,0,0.4490186770757039
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,8192,1,0,18.174293518066406
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,8192,8,0,4.333466529846191
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,8192,16,0,2.2804853121439614
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,6144,2,0,45.77104695638021
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,8192,32,0,1.2978133360544841
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,8192,4,0,8.777653376261393
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,16,4096,1,0,96.3309834798177
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,8192,64,0,0.8128053347269694
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,8192,128,0,0.47808531920115155
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,8192,2,0,17.58669916788737
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,8192,16,0,4.421861330668132
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,8192,32,0,2.425408045450846
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,8192,64,0,1.369226614634196
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,8192,128,0,0.8710453510284424
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,8192,8,0,8.78379758199056
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,8192,4,0,17.922810872395832
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,8192,1,0,35.521339416503906
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,8,8192,16,0,9.110597610473633
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,8192,2,0,35.630592346191406
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,8,8192,8,0,17.93890126546224
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,8,8192,32,0,4.710224151611328
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,8,8192,64,0,2.5737172762552896
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,8,8192,128,0,1.5722613334655762
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,6144,1,0,91.59529622395833
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,10240,8,0,3.377493222554525
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,10240,4,0,6.533440272013347
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,8,8192,4,0,36.80179087320963
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,10240,16,0,1.788101355234782
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,10240,32,0,1.0982720057169597
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,10240,64,0,0.6376320123672485
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,10240,128,0,0.6138773361841837
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,10240,2,0,13.310885111490885
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,10240,8,0,6.4947255452473955
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,10240,1,0,26.367284138997395
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,10240,4,0,13.330554962158203
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,8192,1,0,73.89541625976562
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,10240,16,0,3.3472798665364585
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,10240,32,0,1.9005386034647624
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,10240,64,0,1.1817333698272705
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,10240,128,0,0.6794293721516927
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,10240,2,0,26.404693603515625
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,8,8192,2,0,73.76065572102864
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,10240,8,0,13.273066202799479
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,10240,16,0,6.611061096191406
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,10240,32,0,3.379701296488444
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,10240,64,0,1.902250607808431
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,10240,128,0,1.1817333698272705
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,10240,4,0,26.94005839029948
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,10240,1,0,53.85421244303385
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,12288,8,0,4.448922793070476
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,12288,16,0,2.521786689758301
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,12288,32,0,1.4138773282368977
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,12288,64,0,0.8467199802398682
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,12288,4,0,8.916629155476889
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,12288,128,0,0.6593173344930013
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,12288,2,0,18.375381469726562
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,10240,2,0,53.95589701334635
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,12288,8,0,9.232837041219076
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,12288,32,0,2.5125385920206704
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,12288,16,0,4.701829274495442
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,12288,4,0,18.27782440185547
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,12288,64,0,1.4350080490112305
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,12288,1,0,37.09795125325521
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,12288,128,0,0.9299306869506836
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,12288,2,0,37.26519012451172
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,12288,8,0,18.42644755045573
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,8,8192,1,0,151.66130574544272
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,12288,16,0,9.196538925170898
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,12288,64,0,2.8352320988972983
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,12288,32,0,4.6957651774088545
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,12288,128,0,1.5716800689697266
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,12288,4,0,37.000475565592446
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,10240,1,0,110.15798950195312
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,16384,4,0,15.465775807698568
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,16384,8,0,7.681397120157878
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,16384,32,0,2.230255921681722
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,16384,64,0,1.3701012929280598
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,12288,1,0,76.26701354980469
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,16384,128,0,0.8627999623616537
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,16384,16,0,3.9536479314168296
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,16384,2,0,31.523630777994793
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,16384,16,0,7.744122823079427
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,16384,8,0,15.693717956542969
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,16384,32,0,4.0692799886067705
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,12288,2,0,75.27730305989583
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,16384,64,0,2.2702666918436685
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,16384,128,0,1.6524373690287273
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,16384,4,0,31.388506571451824
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,16384,1,0,63.21846008300781
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,16384,2,0,62.604695638020836
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,4,16384,32,0,8.087285359700521
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,4,16384,8,0,31.75664520263672
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,4,16384,16,0,15.917743682861328
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,4,16384,128,0,2.5607786178588867
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,4,16384,64,0,4.174490610758464
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,4,16384,4,0,62.85133361816406
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,12288,1,0,151.39996337890625
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,1,32768,16,0,14.349839528401693
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,1,32768,32,0,7.273152033487956
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,1,32768,8,0,29.363680521647137
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,16384,1,0,127.0249532063802
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,1,32768,64,0,4.196181297302246
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,1,32768,128,0,2.5755467414855957
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,1,32768,4,0,58.58782450358073
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,4,16384,2,0,128.07056681315103
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,8,2,32768,16,0,29.380452473958332
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,1,32768,2,0,116.11245727539062
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,16,2,32768,8,0,58.715413411458336
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,4,2,32768,32,0,14.728688557942709
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,1,2,32768,128,0,4.294997215270996
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,2,2,32768,64,0,7.481583913167317
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,32,2,32768,4,0,116.67477416992188
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,4,16384,1,0,255.3426717122396
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,1,32768,1,0,233.64359537760416
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,64,2,32768,2,0,233.05326334635416
VLLM,0.12.0,NVIDIA L40S,context_mla,vllm_triton_mla,float16,float16,128,2,32768,1,0,466.3306477864583
