framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,1,1,0,0.012668800354003907
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,1,4,0,0.012652799487113953
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,1,2,0,0.012729600071907043
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,1,16,0,0.012604799866676331
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,1,32,0,0.012800000607967377
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,1,8,0,0.012529599666595458
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,1,64,0,0.012750400602817536
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,1,1,0,0.018648000061511995
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,1,2,0,0.018760000169277192
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,1,8,0,0.01887679994106293
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,1,4,0,0.018825599551200868
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,1,16,0,0.018904000520706177
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,1,32,0,0.01884640008211136
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,1,64,0,0.019049599766731262
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,16,4,0,0.014897599816322327
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,16,8,0,0.014956800639629364
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,16,2,0,0.014827199280261993
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,16,1,0,0.01541759967803955
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,16,16,0,0.014667199552059173
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,16,32,0,0.014787200093269347
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,16,64,0,0.012796799838542938
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,16,2,0,0.021004800498485566
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,16,4,0,0.02098720073699951
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,16,8,0,0.02090719938278198
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,16,1,0,0.021220800280570985
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,16,16,0,0.022947199642658234
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,16,32,0,0.023027199506759643
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,16,64,0,0.020921599864959717
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,32,2,0,0.014860799908638
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,32,4,0,0.014953599870204925
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,32,8,0,0.014694400131702423
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,32,1,0,0.016857600212097167
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,32,16,0,0.014711999893188476
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,32,32,0,0.01467359960079193
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,32,64,0,0.01281760036945343
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,32,2,0,0.021668800711631776
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,32,4,0,0.02112639993429184
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,32,1,0,0.0210207998752594
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,32,8,0,0.02093600034713745
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,32,16,0,0.021009600162506102
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,32,32,0,0.02140959948301315
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,64,1,0,0.016788800060749055
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,32,64,0,0.02085919976234436
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,64,2,0,0.01669439971446991
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,64,4,0,0.014825600385665893
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,64,8,0,0.014740799367427827
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,64,32,0,0.014902399480342865
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,64,64,0,0.01496479958295822
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,64,16,0,0.01496800035238266
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,64,1,0,0.02503199875354767
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,64,2,0,0.02303680032491684
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,64,4,0,0.02294880002737045
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,64,16,0,0.021334399282932282
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,64,8,0,0.02112639993429184
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,64,32,0,0.020942400395870208
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,64,64,0,0.02093279957771301
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,128,1,0,0.016884799301624297
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,128,4,0,0.016920000314712524
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,128,8,0,0.01679680049419403
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,128,2,0,0.017025600373744964
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,128,16,0,0.014828799664974213
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,128,32,0,0.016828800737857818
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,128,64,0,0.01682559996843338
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,128,2,0,0.02518559992313385
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,128,4,0,0.022835199534893037
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,128,1,0,0.02715040147304535
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,128,8,0,0.023027199506759643
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,128,16,0,0.023022399842739107
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,128,32,0,0.022804799675941467
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,256,1,0,0.025155198574066163
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,256,2,0,0.021009600162506102
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,128,64,0,0.02105119973421097
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,256,4,0,0.018995200097560883
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,256,8,0,0.019012799859046935
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,256,16,0,0.018908800184726716
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,256,64,0,0.018873600661754607
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,256,32,0,0.01884479969739914
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,256,1,0,0.03319360017776489
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,256,2,0,0.0271263986825943
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,256,4,0,0.02715519964694977
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,256,16,0,0.02701919972896576
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,256,32,0,0.024979199469089507
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,256,8,0,0.025551998615264894
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,256,64,0,0.024984000623226164
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,512,1,0,0.03520799875259399
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,512,2,0,0.02913439869880676
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,512,8,0,0.022867199778556824
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,512,16,0,0.02216479927301407
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,512,4,0,0.023105600476264955
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,512,32,0,0.02102559953927994
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,512,64,0,0.021052800118923187
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,512,1,0,0.04339359998703003
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,512,4,0,0.029212799668312073
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,512,8,0,0.029284799098968507
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,512,2,0,0.035446399450302125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,512,16,0,0.02924480140209198
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,512,32,0,0.029128000140190125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,512,64,0,0.02719680070877075
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,1024,2,0,0.03954240083694458
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,1024,1,0,0.062009602785110474
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,1024,4,0,0.033297601342201236
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,1024,8,0,0.031246399879455565
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,1024,16,0,0.029300799965858458
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,1024,32,0,0.02922239899635315
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,1024,64,0,0.028595200181007384
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,1024,1,0,0.07431520223617553
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,1024,2,0,0.04770880043506622
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,1024,8,0,0.035385599732398985
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,1024,4,0,0.03966720104217529
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,1024,16,0,0.03517119884490967
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,1024,32,0,0.03312479853630066
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,1024,64,0,0.0332399994134903
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,1536,1,0,0.09557120203971863
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,1536,2,0,0.060838401317596436
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,1536,4,0,0.043424001336097716
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,1536,8,0,0.037566399574279784
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,1536,32,0,0.03519839942455292
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,1536,64,0,0.033220800757408145
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,1536,16,0,0.03620480000972748
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,1536,1,0,0.10876640081405639
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,1536,2,0,0.06854720115661621
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,1536,4,0,0.04951359927654266
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,1536,16,0,0.03945440053939819
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,1536,8,0,0.04159359931945801
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,1536,32,0,0.03936800062656402
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,1536,64,0,0.03745439946651459
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,2048,1,0,0.13345279693603515
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,2048,2,0,0.0805296003818512
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,2048,4,0,0.05380319952964783
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,2048,8,0,0.048089599609375
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,2048,16,0,0.043617600202560426
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,2048,64,0,0.041529598832130435
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,2048,1,0,0.14599679708480834
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,2048,32,0,0.04321599900722504
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,2048,2,0,0.08856639862060547
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,2048,4,0,0.058006399869918825
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,2048,8,0,0.04954879879951477
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,2048,32,0,0.04515359997749328
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,2048,16,0,0.0457152009010315
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,2048,64,0,0.04351840019226074
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,3072,1,0,0.22393279075622557
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,3072,2,0,0.12971999645233154
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,3072,4,0,0.08248800039291382
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,3072,8,0,0.06403679847717285
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,3072,16,0,0.05793600082397461
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,3072,32,0,0.05576000213623047
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,3072,64,0,0.05374559760093689
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,3072,1,0,0.23029119968414308
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,3072,2,0,0.13565759658813475
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,3072,4,0,0.08660640120506287
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,3072,16,0,0.05804479718208313
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,3072,32,0,0.0556768000125885
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,3072,8,0,0.06416320204734802
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,3072,64,0,0.05376960039138794
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,4096,2,0,0.1911903977394104
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,4096,1,0,0.3394576072692871
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,4096,8,0,0.07848479747772216
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,4096,4,0,0.11729120016098023
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,4096,16,0,0.0721679985523224
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,4096,32,0,0.06992959976196289
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,4096,64,0,0.06825119853019715
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,4096,1,0,0.32743520736694337
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,4096,2,0,0.18789440393447876
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,4096,8,0,0.0794272005558014
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,4096,4,0,0.11904000043869019
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,4096,16,0,0.07021920084953308
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,4096,32,0,0.06792160272598266
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,4096,64,0,0.0639631986618042
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,6144,1,0,0.6342031955718994
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,6144,8,0,0.12697600126266478
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,6144,4,0,0.2007551908493042
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,6144,2,0,0.3404783964157104
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,6144,16,0,0.10292799472808838
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,6144,32,0,0.09879519939422607
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,6144,64,0,0.09478880167007446
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,6144,2,0,0.31607999801635744
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,6144,1,0,0.5628880023956299
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,6144,4,0,0.1857200026512146
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,6144,8,0,0.12156480550765991
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,6144,16,0,0.09485759735107421
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,6144,32,0,0.08862079977989197
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,6144,64,0,0.08655359745025634
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,8192,1,0,0.9914112091064453
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,8192,2,0,0.547214412689209
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,8192,8,0,0.19112000465393067
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,8192,16,0,0.1316383957862854
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,8192,4,0,0.30454719066619873
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,8192,32,0,0.12541760206222535
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,8192,64,0,0.12123359441757202
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,8192,1,0,0.8568767547607422
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,8192,2,0,0.4694608211517334
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,8192,4,0,0.2743247985839844
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,8192,8,0,0.1728943943977356
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,8192,16,0,0.11740959882736206
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,8192,64,0,0.10686719417572021
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,8192,32,0,0.11145600080490112
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,10240,1,0,1.4245920181274414
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,10240,2,0,0.7736815929412841
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,10240,4,0,0.43922719955444334
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,10240,8,0,0.26386559009552
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,10240,16,0,0.16331039667129515
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,10240,32,0,0.15284639596939087
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,10240,64,0,0.14829920530319213
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,10240,1,0,1.1995471954345702
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,10240,2,0,0.6541679859161377
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,10240,4,0,0.3720319986343384
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,10240,8,0,0.23151040077209473
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,10240,32,0,0.1329584002494812
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,10240,16,0,0.1483183979988098
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,10240,64,0,0.12753920555114745
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,12288,1,0,1.9447456359863282
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,12288,2,0,1.0503600120544434
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,12288,8,0,0.3478384017944336
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,12288,4,0,0.5755536079406738
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,12288,16,0,0.21559839248657225
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,12288,32,0,0.17916159629821776
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,12288,64,0,0.17473759651184081
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,12288,1,0,1.602299118041992
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,12288,4,0,0.48273282051086425
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,12288,2,0,0.8612799644470215
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,12288,8,0,0.2914655923843384
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,12288,16,0,0.19151519536972045
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,12288,64,0,0.1479423999786377
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,12288,32,0,0.1561247944831848
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,16384,1,0,3.4406944274902345
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,16384,8,0,0.5388304233551026
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,16384,4,0,0.9300368309020997
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,16384,2,0,1.6975519180297851
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,16384,16,0,0.34211680889129636
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,16384,32,0,0.2384511947631836
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,16384,64,0,0.2298815965652466
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,16384,2,0,1.374715232849121
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,16384,1,0,2.569563293457031
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,16384,8,0,0.4368752002716064
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,16384,4,0,0.7523231983184815
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,16384,16,0,0.2858736038208008
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,16384,64,0,0.19103519916534423
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,16384,32,0,0.19921280145645143
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,32768,4,0,3.17083683013916
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,32768,8,0,1.710772705078125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,32768,1,0,12.690638732910156
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,32768,2,0,6.633884429931641
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,32768,16,0,0.9852255821228028
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,1,32768,64,0,0.4582496166229248
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,32768,32,0,0.6169263839721679
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,32768,1,0,9.168350219726562
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,32768,4,0,2.36389274597168
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,32768,2,0,4.54627685546875
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,32768,8,0,1.3024239540100098
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,32768,16,0,0.7744319915771485
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,32768,32,0,0.49538559913635255
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,1,32768,64,0,0.35537118911743165
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,1,1,0,0.01684480011463165
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,1,2,0,0.014902399480342865
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,1,4,0,0.013105599582195282
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,1,8,0,0.014742399752140044
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,1,32,0,0.014630399644374847
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,1,64,0,0.012676799297332763
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,1,1,0,0.02294880002737045
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,1,16,0,0.01284479945898056
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,1,2,0,0.023043200373649597
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,1,4,0,0.021188800036907197
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,1,8,0,0.020974400639533996
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,1,16,0,0.021048000454902648
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,1,32,0,0.023056000471115112
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,16,1,0,0.015371200442314149
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,1,64,0,0.018929600715637207
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,16,4,0,0.014776000380516052
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,16,2,0,0.014851200580596923
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,16,8,0,0.014593599736690522
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,16,16,0,0.014776000380516052
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,16,32,0,0.014894400537014008
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,16,1,0,0.024732799828052522
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,16,2,0,0.020926399528980254
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,16,64,0,0.014875200390815736
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,16,4,0,0.023028799891471864
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,16,8,0,0.02096959948539734
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,16,16,0,0.02099200040102005
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,16,32,0,0.019072000682353974
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,16,64,0,0.02099999934434891
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,32,1,0,0.016841599345207216
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,32,2,0,0.016735999286174773
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,32,4,0,0.014902399480342865
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,32,8,0,0.015052799880504609
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,32,16,0,0.014856000244617463
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,32,32,0,0.014880000054836274
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,32,64,0,0.014790399372577668
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,32,1,0,0.025110399723052977
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,32,2,0,0.022891199588775633
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,32,4,0,0.02095839977264404
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,32,8,0,0.02123039960861206
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,32,32,0,0.02300959974527359
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,32,16,0,0.02096959948539734
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,32,64,0,0.021593600511550903
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,64,1,0,0.018918399512767792
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,64,2,0,0.016891199350357055
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,64,4,0,0.016710400581359863
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,64,8,0,0.014843200147151948
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,64,16,0,0.014881600439548493
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,64,32,0,0.0146479994058609
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,64,64,0,0.014747199416160584
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,64,1,0,0.027020800113677978
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,64,4,0,0.023094399273395537
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,64,2,0,0.022947199642658234
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,64,8,0,0.02301599979400635
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,64,16,0,0.022694399952888487
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,64,32,0,0.023038400709629057
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,128,1,0,0.021001599729061127
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,64,64,0,0.021171200275421142
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,128,2,0,0.018995200097560883
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,128,4,0,0.016841599345207216
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,128,8,0,0.014726400375366211
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,128,16,0,0.017008000612258913
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,128,32,0,0.016777600347995757
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,128,64,0,0.014886400103569031
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,128,1,0,0.03125439882278443
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,128,2,0,0.026748800277709962
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,128,4,0,0.024956800043582916
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,128,8,0,0.024881599843502043
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,128,16,0,0.023049600422382355
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,128,32,0,0.02298080027103424
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,128,64,0,0.023030400276184082
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,256,1,0,0.031241598725318908
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,256,2,0,0.024929599463939668
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,256,4,0,0.02227199971675873
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,256,8,0,0.01906079947948456
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,256,16,0,0.01897439956665039
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,256,32,0,0.01892160028219223
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,256,64,0,0.01719679981470108
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,256,1,0,0.041443198919296265
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,256,2,0,0.03332479894161224
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,256,4,0,0.028966400027275085
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,256,8,0,0.0272816002368927
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,256,16,0,0.025139200687408447
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,256,32,0,0.025174400210380553
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,256,64,0,0.025099200010299683
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,512,1,0,0.054020798206329344
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,512,2,0,0.034929600358009336
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,512,4,0,0.02730880081653595
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,512,8,0,0.024988800287246704
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,512,16,0,0.023004800081253052
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,512,32,0,0.02280000001192093
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,512,64,0,0.022889600694179536
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,512,1,0,0.06612319946289062
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,512,4,0,0.03537440001964569
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,512,2,0,0.0435263991355896
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,512,8,0,0.0312608003616333
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,512,16,0,0.031115201115608216
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,512,32,0,0.029068800806999206
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,1024,1,0,0.10718879699707032
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,1024,2,0,0.06325439810752868
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,512,64,0,0.029180800914764403
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,1024,4,0,0.04008319973945618
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,1024,8,0,0.03531199991703034
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,1024,16,0,0.03126559853553772
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,1024,32,0,0.03118079900741577
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,1024,64,0,0.02914400100708008
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,1024,1,0,0.12387200593948364
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,1024,2,0,0.07416480183601379
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,1024,4,0,0.049527999758720395
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,1024,8,0,0.04331200122833252
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,1024,16,0,0.037376001477241516
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,1024,32,0,0.035132798552513125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,1024,64,0,0.035281598567962646
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,1536,1,0,0.16730719804763794
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,1536,2,0,0.09808480143547058
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,1536,4,0,0.06392319798469544
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,1536,8,0,0.04556480050086975
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,1536,16,0,0.04134239852428436
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,1536,32,0,0.037457600235939026
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,1536,64,0,0.0372655987739563
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,1536,1,0,0.18861759901046754
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,1536,2,0,0.10988960266113282
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,1536,4,0,0.07225599884986877
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,1536,8,0,0.05153759717941284
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,1536,16,0,0.045742401480674745
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,1536,32,0,0.04328159987926483
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,1536,64,0,0.04141440093517303
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,2048,1,0,0.2370784044265747
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,2048,2,0,0.13425439596176147
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,2048,4,0,0.08444479703903199
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,2048,16,0,0.04965119957923889
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,2048,8,0,0.05586720108985901
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,2048,32,0,0.045552000403404236
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,2048,64,0,0.045484799146652224
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,2048,1,0,0.25735518932342527
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,2048,2,0,0.1481824040412903
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,2048,4,0,0.09226400256156922
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,2048,8,0,0.061908799409866336
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,2048,16,0,0.053844797611236575
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,2048,32,0,0.049670401215553286
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,2048,64,0,0.04754559993743897
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,3072,1,0,0.4067200183868408
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,3072,2,0,0.22934238910675048
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,3072,4,0,0.13523839712142943
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,3072,16,0,0.06600319743156433
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,3072,8,0,0.08659999966621398
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,3072,32,0,0.0618287980556488
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,3072,64,0,0.060012799501419065
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,3072,1,0,0.4167312145233154
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,3072,4,0,0.13923360109329225
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,3072,2,0,0.23435840606689454
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,3072,8,0,0.09259200096130371
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,3072,16,0,0.07023839950561524
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,3072,32,0,0.06213759779930115
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,3072,64,0,0.06018720269203186
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,4096,1,0,0.6197135925292969
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,4096,2,0,0.34207680225372317
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,4096,4,0,0.19733279943466187
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,4096,8,0,0.1244047999382019
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,4096,16,0,0.08450400233268737
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,4096,32,0,0.07824479937553405
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,4096,64,0,0.07624959945678711
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,4096,1,0,0.6045728206634522
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,4096,2,0,0.3344144105911255
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,4096,4,0,0.19527039527893067
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,4096,8,0,0.12514879703521728
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,4096,16,0,0.08545119762420654
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,4096,32,0,0.07635679841041565
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,4096,64,0,0.07215039730072022
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,6144,1,0,1.1648768424987792
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,6144,2,0,0.6426608085632324
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,6144,4,0,0.34951999187469485
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,6144,8,0,0.2108367919921875
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,6144,16,0,0.135863995552063
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,6144,32,0,0.10997439622879028
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,6144,64,0,0.10667200088500976
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,6144,1,0,1.0625215530395509
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,6144,4,0,0.3263855934143066
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,6144,2,0,0.5719935894012451
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,6144,8,0,0.19716639518737794
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,6144,16,0,0.13159040212631226
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,6144,32,0,0.10529760122299195
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,6144,64,0,0.09890239834785461
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,8192,1,0,1.8734399795532226
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,8192,2,0,1.0037952423095704
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,8192,4,0,0.5592832088470459
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,8192,8,0,0.31579039096832273
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,8192,16,0,0.20332000255584717
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,8192,32,0,0.14209920167922974
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,8192,64,0,0.1359935998916626
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,8192,1,0,1.6314384460449218
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,8192,2,0,0.8681695938110352
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,8192,4,0,0.4842207908630371
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,8192,8,0,0.28758559226989744
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,8192,32,0,0.1317952036857605
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,8192,64,0,0.1253600001335144
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,8192,16,0,0.18899519443511964
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,10240,1,0,2.8994815826416014
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,10240,4,0,0.7893504142761231
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,10240,8,0,0.4468048095703125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,10240,2,0,1.4405792236328125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,10240,16,0,0.28326079845428465
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,10240,32,0,0.17889120578765869
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,10240,64,0,0.1665168046951294
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,10240,1,0,2.310840034484863
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,10240,2,0,1.2299887657165527
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,10240,4,0,0.6712656021118164
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,10240,8,0,0.387007999420166
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,10240,16,0,0.24996960163116455
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,10240,32,0,0.1685696005821228
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,10240,64,0,0.14984960556030275
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,12288,2,0,1.9648704528808594
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,12288,1,0,3.778518295288086
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,12288,8,0,0.6003903865814209
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,12288,4,0,1.0909279823303222
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,12288,16,0,0.3605072021484375
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,12288,32,0,0.23417119979858397
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,12288,64,0,0.19716320037841797
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,12288,1,0,3.1421552658081056
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,12288,2,0,1.621980857849121
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,12288,4,0,0.8820832252502442
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,12288,8,0,0.5078239917755127
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,12288,16,0,0.3117664098739624
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,12288,32,0,0.21510560512542726
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,12288,64,0,0.174510395526886
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,16384,4,0,1.7326255798339845
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,16384,1,0,6.664580535888672
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,16384,2,0,3.457555389404297
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,16384,8,0,0.951535987854004
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,16384,16,0,0.5574336051940918
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,16384,64,0,0.2636415958404541
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,16384,32,0,0.36143519878387453
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,16384,1,0,5.071068954467774
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,16384,8,0,0.7806591987609863
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,16384,4,0,1.4058608055114745
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,16384,2,0,2.599291229248047
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,16384,16,0,0.4678512096405029
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,16384,32,0,0.3138688087463379
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,16384,64,0,0.22710399627685546
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,32768,8,0,3.3448863983154298
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,32768,4,0,6.396782302856446
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,32768,2,0,12.784368133544922
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,32768,16,0,1.8124591827392578
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,32768,32,0,1.040384006500244
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,32768,1,0,25.805853271484374
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,2,32768,64,0,0.6694096088409424
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,32768,8,0,2.4106447219848635
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,32768,4,0,4.624844741821289
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,32768,2,0,9.14449462890625
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,32768,1,0,18.272813415527345
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,32768,32,0,0.8250016212463379
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,32768,16,0,1.3599231719970704
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,1,1,0,0.021492800116539
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,1,4,0,0.014403200149536133
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,2,32768,64,0,0.5521056175231933
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,1,2,0,0.014897599816322327
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,1,8,0,0.014852799475193024
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,1,16,0,0.012956799566745758
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,1,32,0,0.014860799908638
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,1,64,0,0.012772800028324127
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,1,1,0,0.025095999240875244
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,1,2,0,0.023022399842739107
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,1,4,0,0.02298399955034256
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,1,8,0,0.02119999974966049
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,1,32,0,0.020983999967575072
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,1,64,0,0.022864000499248506
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,16,1,0,0.018964800238609313
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,1,16,0,0.02298080027103424
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,16,4,0,0.016777600347995757
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,16,8,0,0.014787200093269347
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,16,2,0,0.01485760062932968
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,16,16,0,0.014812800288200378
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,16,32,0,0.014843200147151948
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,16,64,0,0.014876799285411834
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,16,2,0,0.023099200427532197
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,16,1,0,0.02699199914932251
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,16,4,0,0.02101600021123886
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,16,8,0,0.023004800081253052
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,16,16,0,0.02094399929046631
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,16,32,0,0.020870399475097657
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,16,64,0,0.02285439968109131
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,32,1,0,0.021007999777793884
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,32,2,0,0.016908800601959227
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,32,4,0,0.015020799636840821
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,32,8,0,0.014800000190734863
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,32,32,0,0.01478399932384491
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,32,16,0,0.014764800667762756
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,32,64,0,0.01488800048828125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,32,1,0,0.029287999868392943
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,32,2,0,0.02513279914855957
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,32,4,0,0.02311519980430603
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,32,8,0,0.023025600612163542
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,32,16,0,0.02290239930152893
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,32,32,0,0.022910399734973906
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,32,64,0,0.02109439969062805
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,64,1,0,0.024993599951267244
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,64,2,0,0.018768000602722167
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,64,4,0,0.01671999990940094
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,64,8,0,0.016947199404239655
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,64,16,0,0.014846399426460266
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,64,32,0,0.014985600113868713
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,64,64,0,0.014873600006103516
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,64,1,0,0.03357279896736145
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,64,2,0,0.027220800518989563
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,64,4,0,0.025172799825668335
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,64,8,0,0.024982400238513947
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,64,16,0,0.02312159985303879
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,64,32,0,0.022881600260734557
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,64,64,0,0.023076799511909486
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,128,1,0,0.03129920065402984
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,128,2,0,0.022443200647830962
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,128,4,0,0.018929600715637207
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,128,8,0,0.01703680008649826
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,128,16,0,0.016806399822235106
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,128,64,0,0.014921599626541137
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,128,32,0,0.01488959938287735
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,128,1,0,0.04192320108413696
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,128,2,0,0.03128960132598877
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,128,4,0,0.02526400089263916
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,128,8,0,0.02516320049762726
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,128,16,0,0.0230880007147789
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,128,32,0,0.023027199506759643
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,128,64,0,0.02309280037879944
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,256,1,0,0.047672000527381894
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,256,2,0,0.03131360113620758
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,256,4,0,0.025070399045944214
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,256,8,0,0.020985600352287293
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,256,32,0,0.018990400433540344
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,256,16,0,0.020924800634384157
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,256,64,0,0.019065600633621217
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,256,1,0,0.06228640079498291
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,256,2,0,0.041540798544883725
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,256,4,0,0.035308799147605895
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,256,8,0,0.029246398806571962
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,256,16,0,0.027140799164772033
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,256,32,0,0.027195200324058533
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,256,64,0,0.025171199440956117
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,512,1,0,0.09270560145378112
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,512,2,0,0.05574719905853272
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,512,4,0,0.03543359935283661
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,512,16,0,0.027009600400924684
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,512,8,0,0.029099199175834655
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,512,32,0,0.02503199875354767
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,512,64,0,0.025007998943328856
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,512,1,0,0.11290719509124755
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,512,2,0,0.06814240217208863
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,512,4,0,0.04361119866371155
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,512,8,0,0.03746879994869232
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,512,16,0,0.03301919996738434
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,512,32,0,0.031224000453948974
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,512,64,0,0.03110400140285492
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,1024,1,0,0.1899616003036499
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,1024,4,0,0.06688960194587708
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,1024,2,0,0.10863679647445679
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,1024,8,0,0.044268798828125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,1024,16,0,0.037083199620246886
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,1024,32,0,0.035041600465774536
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,1024,64,0,0.03335680067539215
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,1024,1,0,0.2226464033126831
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,1024,2,0,0.12760000228881835
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,1024,4,0,0.07637280225753784
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,1024,16,0,0.044059199094772336
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,1024,8,0,0.051718401908874514
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,1024,32,0,0.04137279987335205
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,1024,64,0,0.03942880034446716
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,1536,1,0,0.30743680000305174
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,1536,2,0,0.1705135941505432
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,1536,4,0,0.10184320211410522
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,1536,8,0,0.0659280002117157
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,1536,16,0,0.04975839853286743
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,1536,32,0,0.04562239944934845
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,1536,64,0,0.04152800142765045
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,1536,1,0,0.3424367904663086
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,1536,2,0,0.19348640441894532
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,1536,4,0,0.11555839776992798
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,1536,8,0,0.07641280293464661
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,1536,16,0,0.0558031976222992
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,1536,32,0,0.051819199323654176
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,1536,64,0,0.04567039906978607
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,2048,1,0,0.43911042213439944
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,2048,2,0,0.24351999759674073
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,2048,4,0,0.14179199934005737
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,2048,8,0,0.08873440027236938
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,2048,16,0,0.061887997388839724
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,2048,32,0,0.05371999740600586
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,2048,64,0,0.051660799980163576
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,2048,2,0,0.26554720401763915
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,2048,1,0,0.4750016212463379
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,2048,8,0,0.09870719909667969
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,2048,4,0,0.15419679880142212
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,2048,16,0,0.06713600158691406
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,2048,32,0,0.06196320056915283
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,2048,64,0,0.05782880187034607
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,3072,1,0,0.7807087898254395
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,3072,2,0,0.41633920669555663
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,3072,4,0,0.23705599308013917
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,3072,8,0,0.14266879558563234
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,3072,16,0,0.09430239796638488
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,3072,32,0,0.07620959877967834
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,3072,64,0,0.070033597946167
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,3072,1,0,0.7874063968658447
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,3072,2,0,0.4265376091003418
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,3072,4,0,0.2443903923034668
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,3072,8,0,0.1481824040412903
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,3072,16,0,0.10117759704589843
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,3072,32,0,0.07837600111961365
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,3072,64,0,0.07411680221557618
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,4096,1,0,1.187390422821045
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,4096,2,0,0.6488160133361817
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,4096,4,0,0.35449919700622556
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,4096,8,0,0.20792479515075685
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,4096,16,0,0.13502240180969238
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,4096,32,0,0.09672319889068604
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,4096,64,0,0.0906607985496521
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,4096,1,0,1.1490511894226074
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,4096,2,0,0.616480016708374
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,4096,4,0,0.34996960163116453
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,4096,8,0,0.20850720405578613
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,4096,16,0,0.14018080234527588
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,4096,64,0,0.09064800143241883
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,4096,32,0,0.09885280132293701
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,6144,1,0,2.307489585876465
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,6144,2,0,1.188422393798828
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,6144,4,0,0.660484790802002
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,6144,8,0,0.3671663999557495
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,6144,16,0,0.22649118900299073
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,6144,64,0,0.12714719772338867
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,6144,32,0,0.1538815975189209
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,6144,1,0,2.0532384872436524
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,6144,2,0,1.0835984230041504
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,6144,4,0,0.5975728034973145
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,6144,8,0,0.34596478939056396
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,6144,16,0,0.21771039962768554
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,6144,32,0,0.15562399625778198
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,6144,64,0,0.12558560371398925
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,8192,1,0,3.686201477050781
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,8192,2,0,1.8930992126464843
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,8192,4,0,1.0263615608215333
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,8192,8,0,0.5824848175048828
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,8192,16,0,0.34206080436706543
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,8192,32,0,0.22630560398101807
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,8192,64,0,0.16641119718551636
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,8192,1,0,3.2137374877929688
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,8192,2,0,1.6559455871582032
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,8192,4,0,0.8950608253479004
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,8192,8,0,0.5114575862884522
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,8192,16,0,0.3165679931640625
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,8192,32,0,0.2171152114868164
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,8192,64,0,0.16037440299987793
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,10240,1,0,5.699305725097656
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,10240,2,0,2.9210208892822265
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,10240,4,0,1.4709471702575683
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,10240,16,0,0.4858240127563477
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,10240,8,0,0.8189743995666504
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,10240,64,0,0.21316640377044677
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,10240,32,0,0.3069727897644043
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,10240,4,0,1.2541744232177734
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,10240,2,0,2.355155181884766
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,10240,1,0,4.594095993041992
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,10240,8,0,0.7055007934570312
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,10240,16,0,0.42584958076477053
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,10240,32,0,0.28402080535888674
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,10240,64,0,0.20097119808197023
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,12288,4,0,2.0337263107299806
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,12288,8,0,1.124828815460205
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,12288,1,0,7.795350646972656
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,12288,2,0,3.902312088012695
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,12288,32,0,0.40504961013793944
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,12288,16,0,0.6336415767669678
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,12288,64,0,0.26829919815063474
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,12288,1,0,6.161366271972656
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,12288,4,0,1.6670480728149415
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,12288,8,0,0.9216927528381348
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,12288,2,0,3.1864927291870115
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,12288,16,0,0.5475376129150391
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,12288,32,0,0.3554608106613159
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,12288,64,0,0.254694390296936
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,16384,8,0,1.7663871765136718
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,16384,4,0,3.372137451171875
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,16384,2,0,6.676659393310547
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,16384,1,0,14.186875915527343
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,16384,32,0,0.6118847846984863
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,16384,16,0,0.9959088325500488
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,4,16384,64,0,0.40978078842163085
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,16384,8,0,1.4404447555541993
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,16384,4,0,2.6540096282958983
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,16384,1,0,10.391563415527344
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,16384,2,0,5.1574352264404295
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,16384,16,0,0.8356592178344726
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,16384,32,0,0.5262815952301025
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,1,1,0,0.026366400718688964
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,4,16384,64,0,0.370142388343811
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,1,2,0,0.020902399718761445
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,1,4,0,0.014936000108718872
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,1,8,0,0.014873600006103516
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,1,16,0,0.014961600303649902
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,1,32,0,0.014793600142002105
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,1,64,0,0.013873599469661713
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,1,1,0,0.03321920037269592
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,1,2,0,0.027137601375579835
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,1,4,0,0.023270399868488313
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,1,8,0,0.02298559993505478
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,1,16,0,0.020843200385570526
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,1,32,0,0.021011200547218323
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,1,64,0,0.022912000119686127
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,16,1,0,0.02524479925632477
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,16,2,0,0.01883520036935806
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,16,4,0,0.016841599345207216
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,16,8,0,0.014880000054836274
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,16,16,0,0.01480800062417984
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,16,32,0,0.014753599464893342
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,16,64,0,0.014875200390815736
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,16,1,0,0.0333840012550354
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,16,2,0,0.02723039984703064
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,16,4,0,0.023060800135135652
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,16,8,0,0.02306559979915619
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,16,16,0,0.022991999983787537
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,16,32,0,0.02218720018863678
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,16,64,0,0.02105119973421097
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,32,1,0,0.029203200340270997
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,32,2,0,0.020891200006008147
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,32,4,0,0.016927999258041383
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,32,8,0,0.014878399670124054
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,32,16,0,0.015121600031852723
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,32,32,0,0.014796799421310425
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,32,64,0,0.014947199821472168
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,32,1,0,0.03727200031280518
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,32,2,0,0.02858720123767853
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,32,4,0,0.024902400374412537
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,32,8,0,0.022899200022220612
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,32,16,0,0.022862400114536285
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,32,32,0,0.022891199588775633
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,32,64,0,0.02099359929561615
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,64,2,0,0.025036799907684325
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,64,4,0,0.018961599469184874
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,64,1,0,0.035252800583839415
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,64,8,0,0.01722719967365265
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,64,16,0,0.01478559970855713
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,64,32,0,0.015936000645160674
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,64,64,0,0.014846399426460266
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,64,1,0,0.04757120013237
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,64,2,0,0.033102399110794066
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,64,4,0,0.025035199522972108
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,64,8,0,0.025009599328041077
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,64,16,0,0.022864000499248506
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,64,32,0,0.023004800081253052
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,64,64,0,0.023017600178718567
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,128,2,0,0.03316639959812164
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,128,1,0,0.04968799948692322
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,128,4,0,0.022907200455665588
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,128,8,0,0.01892320066690445
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,128,16,0,0.018863999843597413
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,128,32,0,0.01684480011463165
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,128,64,0,0.016884799301624297
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,128,1,0,0.06402720212936401
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,128,2,0,0.043584001064300534
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,128,4,0,0.03314880132675171
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,128,8,0,0.029206401109695433
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,128,16,0,0.026952001452445983
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,128,32,0,0.023104000091552734
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,128,64,0,0.023075200617313385
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,256,1,0,0.08649119734764099
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,256,2,0,0.049537599086761475
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,256,4,0,0.03127520084381104
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,256,8,0,0.02699359953403473
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,256,16,0,0.023180800676345825
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,256,32,0,0.022830399870872497
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,256,64,0,0.02110079973936081
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,256,1,0,0.10716639757156372
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,256,2,0,0.06385599970817565
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,256,4,0,0.04338400065898895
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,256,8,0,0.035857599973678586
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,256,16,0,0.031064000725746155
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,256,64,0,0.02717919945716858
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,256,32,0,0.029080000519752503
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,512,1,0,0.1646944046020508
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,512,4,0,0.05793120265007019
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,512,2,0,0.09467840194702148
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,512,8,0,0.03927040100097656
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,512,16,0,0.03154720067977905
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,512,32,0,0.029123198986053467
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,512,64,0,0.02709279954433441
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,512,1,0,0.2043760061264038
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,512,2,0,0.1163856029510498
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,512,4,0,0.07211520075798035
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,512,16,0,0.041503998637199405
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,512,8,0,0.04949440062046051
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,512,32,0,0.03537279963493347
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,512,64,0,0.03530719876289368
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,1024,1,0,0.35354878902435305
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,1024,2,0,0.1937440037727356
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,1024,4,0,0.11483680009841919
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,1024,8,0,0.07091519832611085
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,1024,16,0,0.048404800891876223
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,1024,32,0,0.0435232013463974
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,1024,64,0,0.03958880007266998
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,1024,1,0,0.40977277755737307
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,1024,2,0,0.22922239303588868
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,1024,4,0,0.1337488055229187
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,1024,8,0,0.08457599878311158
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,1024,16,0,0.06003999710083008
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,1024,32,0,0.05360159873962402
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,1024,64,0,0.04754559993743897
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,1536,1,0,0.5763296127319336
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,1536,2,0,0.3121376037597656
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,1536,4,0,0.18060959577560426
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,1536,8,0,0.10942879915237427
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,1536,16,0,0.07426239848136902
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,1536,32,0,0.057897597551345825
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,1536,64,0,0.053668802976608275
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,1536,1,0,0.6448239803314209
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,1536,2,0,0.35098559856414796
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,1536,4,0,0.20276639461517335
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,1536,8,0,0.12537440061569213
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,1536,16,0,0.0843936026096344
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,1536,32,0,0.068476802110672
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,1536,64,0,0.05987840294837952
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,2048,1,0,0.8687600135803223
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,2048,2,0,0.4552000045776367
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,2048,4,0,0.2561503887176514
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,2048,8,0,0.1541152000427246
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,2048,16,0,0.1011423945426941
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,2048,32,0,0.07231040000915527
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,2048,64,0,0.06607199907302856
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,2048,1,0,0.9069984436035157
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,2048,2,0,0.4915791988372803
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,2048,4,0,0.27799038887023925
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,2048,8,0,0.16651359796524048
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,2048,16,0,0.11314239501953124
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,2048,32,0,0.0823199987411499
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,2048,64,0,0.07221119999885559
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,3072,2,0,0.7933695793151856
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,3072,4,0,0.4409664154052734
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,3072,1,0,1.501148796081543
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,3072,8,0,0.2564079999923706
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,3072,16,0,0.16018240451812743
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,3072,32,0,0.1126304030418396
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,3072,64,0,0.09262080192565918
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,3072,1,0,1.5207712173461914
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,3072,2,0,0.8061360359191895
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,3072,4,0,0.44846401214599607
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,3072,8,0,0.26473920345306395
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,3072,16,0,0.17055679559707643
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,3072,32,0,0.12552000284194947
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,3072,64,0,0.1009376049041748
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,4096,1,0,2.321681594848633
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,4096,2,0,1.2234335899353028
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,4096,8,0,0.3815664052963257
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,4096,4,0,0.6574480056762695
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,4096,16,0,0.23188478946685792
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,4096,32,0,0.15813440084457397
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,4096,64,0,0.11939200162887573
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,4096,1,0,2.245635223388672
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,4096,2,0,1.1803119659423829
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,4096,8,0,0.37637279033660886
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,4096,4,0,0.6445871829986572
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,4096,16,0,0.2361311912536621
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,4096,64,0,0.12872159481048584
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,4096,32,0,0.1684991955757141
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,6144,1,0,4.528910446166992
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,6144,4,0,1.2610960006713867
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,6144,2,0,2.2990272521972654
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,6144,8,0,0.6807119846343994
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,6144,16,0,0.40392317771911623
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,6144,32,0,0.2701263904571533
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,6144,64,0,0.1883072018623352
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,6144,1,0,4.051161575317383
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,6144,4,0,1.1305055618286133
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,6144,2,0,2.0948112487792967
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,6144,8,0,0.6335487842559815
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,6144,16,0,0.3876352071762085
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,6144,64,0,0.19560320377349855
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,6144,32,0,0.2595952033996582
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,8192,2,0,3.8483871459960937
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,8192,4,0,1.9479936599731444
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,8192,1,0,7.507444763183594
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,8192,8,0,1.0928447723388672
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,8192,16,0,0.6172319889068604
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,8192,32,0,0.3904831886291504
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,8,8192,64,0,0.2810352087020874
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,8192,1,0,6.407408142089844
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,8192,2,0,3.2476127624511717
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,8192,4,0,1.712758445739746
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,8192,8,0,0.9507951736450195
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,8192,32,0,0.3707119941711426
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,8192,16,0,0.5703887939453125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,1,1,0,0.03240320086479187
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,8,8192,64,0,0.2719583988189697
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,1,2,0,0.024977600574493407
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,1,4,0,0.022448000311851502
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,1,8,0,0.01488800048828125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,1,16,0,0.014740799367427827
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,1,32,0,0.01491519957780838
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,16,1,64,0,0.014934399724006652
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,1,1,0,0.03745439946651459
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,1,2,0,0.031139200925827025
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,1,4,0,0.02497439980506897
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,1,8,0,0.022892799973487855
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,1,16,0,0.02096319943666458
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,1,32,0,0.02093919962644577
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,16,1,64,0,0.02104160040616989
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,16,1,0,0.037567999958992
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,16,2,0,0.025068798661231996
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,16,4,0,0.020873600244522096
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,16,8,0,0.016441600024700166
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,16,16,0,0.01680160015821457
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,16,32,0,0.014998400211334228
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,16,16,64,0,0.014923200011253357
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,16,1,0,0.04552960097789764
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,16,2,0,0.03326559960842133
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,16,4,0,0.02709279954433441
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,16,8,0,0.022966399788856506
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,16,16,0,0.02296479940414429
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,16,32,0,0.02298240065574646
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,32,1,0,0.04356000125408173
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,16,16,64,0,0.02295520007610321
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,32,2,0,0.031198400259017944
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,32,4,0,0.023027199506759643
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,32,8,0,0.01690559983253479
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,32,16,0,0.01586720049381256
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,32,32,0,0.01478559970855713
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,16,32,64,0,0.016867199540138246
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,32,1,0,0.05382879972457886
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,32,2,0,0.03743039965629578
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,32,4,0,0.02908639907836914
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,32,8,0,0.02502399981021881
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,32,16,0,0.023105600476264955
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,32,32,0,0.023523199558258056
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,16,32,64,0,0.02106879949569702
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,64,1,0,0.05794399976730347
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,64,2,0,0.03731839954853058
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,64,4,0,0.027004799246788024
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,64,8,0,0.018680000305175783
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,64,16,0,0.01693120002746582
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,64,32,0,0.01695519983768463
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,16,64,64,0,0.015334400534629821
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,64,1,0,0.07247679829597473
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,64,2,0,0.047598400712013246
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,64,4,0,0.0334879994392395
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,64,8,0,0.027249601483345032
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,64,16,0,0.025036799907684325
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,64,32,0,0.02499520033597946
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,16,64,64,0,0.02496960014104843
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,128,1,0,0.08846399784088135
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,128,2,0,0.05154079794883728
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,128,4,0,0.03332479894161224
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,128,8,0,0.02495039999485016
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,128,16,0,0.018990400433540344
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,128,32,0,0.01886720061302185
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,16,128,64,0,0.017888000607490538
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,128,1,0,0.11075199842453003
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,128,2,0,0.06625919938087463
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,128,4,0,0.04352160096168518
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,128,8,0,0.033353599905967715
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,128,16,0,0.02905920147895813
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,16,128,64,0,0.025110399723052977
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,128,32,0,0.027079999446868896
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,256,1,0,0.154204797744751
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,256,2,0,0.08923360109329223
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,256,4,0,0.053527998924255374
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,256,16,0,0.02922559976577759
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,256,8,0,0.033899199962615964
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,256,32,0,0.02701599895954132
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,16,256,64,0,0.02524000108242035
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,256,1,0,0.19392000436782836
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,256,2,0,0.11067839860916137
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,256,4,0,0.06611520051956177
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,256,8,0,0.04575519859790802
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,256,16,0,0.03747999966144562
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,256,32,0,0.03348160088062287
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,16,256,64,0,0.0313264012336731
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,512,1,0,0.30763840675354004
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,512,2,0,0.17180479764938356
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,512,4,0,0.10072159767150879
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,512,8,0,0.06385759711265564
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,512,16,0,0.043721601366996765
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,512,32,0,0.03751200139522552
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,16,512,64,0,0.03525919914245605
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,512,1,0,0.3766223907470703
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,512,2,0,0.21201760768890382
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,512,4,0,0.12363040447235107
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,512,8,0,0.07830399870872498
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,512,16,0,0.05479199886322021
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,512,32,0,0.047537600994110106
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,16,512,64,0,0.04344640076160431
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,1024,2,0,0.36359999179840086
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,1024,1,0,0.6724751949310303
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,1024,4,0,0.2098207950592041
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,1024,8,0,0.12544959783554077
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,1024,16,0,0.08440319895744323
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,1024,32,0,0.0599120020866394
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,16,1024,64,0,0.053755199909210204
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,1024,1,0,0.7833839893341065
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,1024,2,0,0.42372798919677734
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,1024,4,0,0.24361600875854492
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,1024,8,0,0.1468224048614502
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,1024,16,0,0.09932640194892883
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,1024,32,0,0.07223359942436218
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,16,1024,64,0,0.06606720089912414
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,1536,1,0,1.1431599617004395
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,1536,2,0,0.5950975894927979
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,1536,4,0,0.3320159912109375
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,1536,8,0,0.1979439973831177
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,1536,16,0,0.12767039537429808
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,1536,32,0,0.09271680116653443
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,16,1536,64,0,0.07422080039978027
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,1536,2,0,0.6672927856445312
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,1536,1,0,1.2492143630981445
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,1536,4,0,0.3736799955368042
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,1536,8,0,0.22378239631652833
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,1536,16,0,0.14714560508728028
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,1536,32,0,0.10714399814605713
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,16,1536,64,0,0.08648639917373657
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,2048,1,0,1.6672655105590821
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,2048,2,0,0.8846639633178711
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,2048,4,0,0.4755119800567627
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,2048,8,0,0.27835841178894044
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,2048,16,0,0.1770527958869934
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,2048,32,0,0.12739839553833007
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,16,2048,64,0,0.09402880072593689
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,2048,1,0,1.7699520111083984
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,2048,2,0,0.9347455978393555
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,2048,4,0,0.5167391777038575
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,2048,8,0,0.3043152093887329
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,2048,16,0,0.1964975953102112
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,2048,32,0,0.1415984034538269
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,16,2048,64,0,0.11085920333862305
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,3072,1,0,2.9611568450927734
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,3072,2,0,1.5392127990722657
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,3072,4,0,0.8325200080871582
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,3072,8,0,0.4754672050476074
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,3072,16,0,0.290451192855835
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,3072,32,0,0.1990432024002075
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,16,3072,64,0,0.14801440238952637
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,3072,1,0,3.004622459411621
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,3072,4,0,0.8448944091796875
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,3072,8,0,0.4868959903717041
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,3072,2,0,1.5614912033081054
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,3072,16,0,0.3068416118621826
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,3072,32,0,0.21140000820159913
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,16,3072,64,0,0.1652384042739868
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,4096,1,0,4.6386161804199215
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,4096,2,0,2.466579246520996
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,4096,8,0,0.7025887966156006
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,4096,4,0,1.2606271743774413
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,4096,16,0,0.420468807220459
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,16,4096,64,0,0.21151840686798096
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,4096,32,0,0.28533918857574464
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,4096,1,0,4.435833740234375
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,4096,2,0,2.302872085571289
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,4096,8,0,0.701416015625
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,4096,4,0,1.2363039970397949
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,4096,32,0,0.29418559074401857
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,16,4096,64,0,0.22064800262451173
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,4096,16,0,0.429966402053833
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,1,1,0,0.060945600271224976
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,1,2,0,0.03119199872016907
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,1,4,0,0.022894400358200073
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,1,8,0,0.01931679993867874
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,1,16,0,0.014884799718856812
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,1,32,0,0.014971199631690978
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,32,1,64,0,0.014753599464893342
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,1,1,0,0.05580959916114807
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,1,2,0,0.03746080100536346
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,1,4,0,0.031070399284362792
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,1,8,0,0.026979199051856993
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,1,16,0,0.0225600004196167
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,1,32,0,0.02321600019931793
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,32,1,64,0,0.022991999983787537
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,16,1,0,0.05987200140953064
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,16,2,0,0.03740800023078918
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,16,4,0,0.027156800031661987
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,16,8,0,0.019182400405406953
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,16,16,0,0.01674560010433197
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,16,32,0,0.01496479958295822
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,32,16,64,0,0.014894400537014008
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,16,1,0,0.07021920084953308
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,16,2,0,0.046147200465202334
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,16,4,0,0.03330560028553009
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,16,8,0,0.02717440128326416
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,16,16,0,0.022940799593925476
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,16,32,0,0.02298559993505478
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,32,16,64,0,0.023024000227451324
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,32,1,0,0.07218719720840454
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,32,2,0,0.045630401372909545
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,32,4,0,0.031097599864006044
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,32,8,0,0.0216511994600296
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,32,16,0,0.016945600509643555
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,32,32,0,0.01690399944782257
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,32,32,64,0,0.01671839952468872
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,32,1,0,0.08483999967575073
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,32,2,0,0.0540448009967804
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,32,4,0,0.03933759927749634
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,32,8,0,0.029019200801849367
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,32,16,0,0.025088000297546386
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,32,32,0,0.023998400568962096
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,32,32,64,0,0.02287199944257736
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,64,1,0,0.10498720407485962
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,64,2,0,0.0580128014087677
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,64,4,0,0.03787040114402771
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,64,8,0,0.02714880108833313
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,64,16,0,0.02083680033683777
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,64,32,0,0.019099199771881105
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,32,64,64,0,0.01687999963760376
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,64,1,0,0.12786079645156861
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,64,2,0,0.07267040014266968
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,64,4,0,0.049779200553894044
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,64,8,0,0.037371200323104856
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,64,16,0,0.029148799180984498
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,64,32,0,0.02712480127811432
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,128,1,0,0.16030240058898926
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,128,2,0,0.09052960276603698
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,32,64,64,0,0.02497279942035675
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,128,4,0,0.05383679866790771
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,128,16,0,0.026976001262664796
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,128,32,0,0.023054400086402894
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,32,128,64,0,0.021087999641895293
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,128,8,0,0.03721440136432648
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,128,1,0,0.20378880500793456
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,128,2,0,0.11524159908294677
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,128,4,0,0.0692848026752472
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,128,8,0,0.04763039946556091
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,128,16,0,0.03737280070781708
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,128,32,0,0.031307199597358705
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,32,128,64,0,0.031167998909950256
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,256,1,0,0.29093759059906005
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,256,2,0,0.16220639944076537
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,256,8,0,0.05666080117225647
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,256,4,0,0.09498080015182495
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,256,16,0,0.03932960033416748
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,256,32,0,0.035016000270843506
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,32,256,64,0,0.031241598725318908
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,256,1,0,0.3578687906265259
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,256,2,0,0.19968800544738768
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,256,4,0,0.11781120300292969
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,256,8,0,0.07213280200958253
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,256,16,0,0.051888000965118405
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,256,32,0,0.045428800582885745
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,32,256,64,0,0.040041598677635196
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,512,1,0,0.5884560108184814
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,512,2,0,0.3199552059173584
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,512,4,0,0.18255360126495362
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,512,8,0,0.11301920413970948
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,512,16,0,0.07443360090255738
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,32,512,64,0,0.049721598625183105
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,512,32,0,0.05371999740600586
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,512,1,0,0.7222591876983643
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,512,2,0,0.3905456066131592
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,512,4,0,0.22288479804992675
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,512,8,0,0.13766080141067505
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,512,16,0,0.09300320148468018
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,512,32,0,0.0680191993713379
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,32,512,64,0,0.06003519892692566
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,1024,1,0,1.3344032287597656
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,1024,4,0,0.38629119396209716
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,1024,2,0,0.696614408493042
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,1024,8,0,0.2303920030593872
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,1024,16,0,0.15012799501419066
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,1024,32,0,0.10732959508895874
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,32,1024,64,0,0.08128640055656433
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,1024,1,0,1.5311920166015625
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,1024,2,0,0.8132160186767579
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,1024,4,0,0.4511824131011963
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,1024,8,0,0.26893439292907717
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,1024,16,0,0.17541600465774537
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,1024,32,0,0.12912319898605346
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,32,1024,64,0,0.1002128005027771
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,1536,1,0,2.2283695220947264
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,1536,2,0,1.157423973083496
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,1536,4,0,0.636681604385376
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,1536,8,0,0.36486239433288575
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,1536,16,0,0.23366079330444336
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,1536,32,0,0.16268960237503052
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,32,1536,64,0,0.1262336015701294
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,1536,1,0,2.45755672454834
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,1536,2,0,1.2911760330200195
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,1536,4,0,0.7058303833007813
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,1536,8,0,0.41390562057495117
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,1536,16,0,0.26633760929107664
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,1536,32,0,0.18897600173950196
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,32,1536,64,0,0.14855200052261353
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,2048,1,0,3.349806213378906
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,2048,2,0,1.7038496017456055
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,2048,4,0,0.9204671859741211
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,2048,8,0,0.5187856197357178
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,2048,16,0,0.3262255907058716
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,2048,32,0,0.2239392042160034
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,32,2048,64,0,0.1715216040611267
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,2048,8,0,0.5690800189971924
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,2048,1,0,3.5032272338867188
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,2048,4,0,0.9881343841552734
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,2048,2,0,1.8345983505249024
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,32,2048,64,0,0.19610559940338135
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,2048,16,0,0.360697603225708
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,1,1,0,0.10250240564346313
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,2048,32,0,0.2524768114089966
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,1,2,0,0.04960159957408905
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,1,8,0,0.022908799350261688
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,1,4,0,0.03105599880218506
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,1,16,0,0.018982400000095368
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,64,1,64,0,0.014763200283050537
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,1,1,0,0.08857600092887878
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,1,2,0,0.05570880174636841
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,1,32,0,0.01676799952983856
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,1,4,0,0.03933280110359192
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,1,8,0,0.031198400259017944
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,1,16,0,0.02587839961051941
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,1,32,0,0.02305919975042343
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,64,1,64,0,0.022870400547981264
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,16,1,0,0.10476640462875367
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,16,2,0,0.059867197275161745
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,16,4,0,0.0372655987739563
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,16,8,0,0.02678079903125763
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,16,16,0,0.018958400189876556
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,16,32,0,0.01693280041217804
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,64,16,64,0,0.014920000731945039
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,16,1,0,0.118014395236969
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,16,2,0,0.07017760276794434
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,16,4,0,0.045660799741744994
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,16,8,0,0.035283198952674864
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,16,16,0,0.027251198887825012
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,16,32,0,0.023038400709629057
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,32,1,0,0.13750079870224
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,64,16,64,0,0.02296479940414429
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,32,2,0,0.07635520100593567
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,32,4,0,0.045561599731445315
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,32,8,0,0.03126400113105774
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,32,16,0,0.02306240051984787
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,32,32,0,0.018940800428390504
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,64,32,64,0,0.01879200041294098
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,32,1,0,0.15410399436950684
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,32,2,0,0.08856639862060547
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,32,4,0,0.05570240020751953
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,32,8,0,0.039419201016426084
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,32,16,0,0.03129920065402984
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,32,32,0,0.02535040080547333
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,64,32,64,0,0.025441598892211915
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,64,1,0,0.193231999874115
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,64,2,0,0.10741599798202514
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,64,4,0,0.06196320056915283
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,64,8,0,0.04139840006828308
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,64,16,0,0.029228800535202028
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,64,32,0,0.023068800568580627
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,64,64,64,0,0.021590399742126464
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,64,1,0,0.23480639457702637
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,64,2,0,0.1298815965652466
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,64,4,0,0.07822880148887634
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,64,8,0,0.05184000134468079
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,64,16,0,0.03949120044708252
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,64,32,0,0.031118398904800414
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,64,64,64,0,0.030134400725364684
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,128,1,0,0.3017040014266968
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,128,2,0,0.16540640592575073
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,128,4,0,0.09530400037765503
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,128,16,0,0.041552001237869264
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,128,8,0,0.059915202856063846
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,128,32,0,0.03312320113182068
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,64,128,64,0,0.028228801488876343
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,128,1,0,0.3801647901535034
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,128,2,0,0.20926399230957032
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,128,4,0,0.12142560482025147
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,128,8,0,0.07620480060577392
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,128,16,0,0.053472000360488894
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,128,32,0,0.043424001336097716
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,64,128,64,0,0.03728959858417511
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,256,1,0,0.5587168216705323
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,256,2,0,0.3034800052642822
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,256,8,0,0.10505919456481934
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,256,4,0,0.17274559736251832
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,256,16,0,0.06813920140266419
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,256,32,0,0.051862400770187375
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,64,256,64,0,0.04555999934673309
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,256,1,0,0.6860095977783203
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,256,2,0,0.37544639110565187
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,256,4,0,0.21371359825134278
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,256,8,0,0.13185919523239137
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,256,16,0,0.08859999775886536
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,256,32,0,0.06608800292015075
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,64,256,64,0,0.05987359881401062
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,512,1,0,1.1535391807556152
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,512,2,0,0.6128015995025635
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,512,4,0,0.34097440242767335
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,512,8,0,0.20477759838104248
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,512,16,0,0.13389600515365602
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,512,32,0,0.09616960287094116
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,64,512,64,0,0.07420480251312256
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,512,1,0,1.4086015701293946
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,512,2,0,0.7452032089233398
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,512,4,0,0.41672320365905763
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,512,8,0,0.2517647981643677
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,512,16,0,0.16598559617996217
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,512,32,0,0.12099360227584839
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,64,512,64,0,0.09707840085029602
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,1024,1,0,2.6105344772338865
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,1024,2,0,1.3796223640441894
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,1024,4,0,0.7452447891235352
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,1024,8,0,0.42977118492126465
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,1024,16,0,0.2735039949417114
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,1024,32,0,0.19313759803771974
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,64,1024,64,0,0.15032479763031006
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,1024,1,0,3.0269567489624025
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,1024,4,0,0.8655327796936035
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,1024,2,0,1.5852144241333008
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,1024,8,0,0.5031487941741943
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,1024,16,0,0.32414560317993163
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,1024,32,0,0.23200159072875975
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,1,1,0,0.17303680181503295
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,64,1024,64,0,0.1804911971092224
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,1,2,0,0.08433279991149903
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,1,4,0,0.0495743989944458
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,1,8,0,0.031140801310539246
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,1,16,0,0.023127999901771546
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,1,32,0,0.019020800292491914
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,1,1,0,0.15818560123443604
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,128,1,64,0,0.01663520038127899
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,1,2,0,0.09052320122718811
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,1,4,0,0.05575680136680603
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,1,8,0,0.039375999569892885
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,1,16,0,0.031196799874305726
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,1,32,0,0.024953599274158477
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,128,1,64,0,0.022808000445365906
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,16,1,0,0.19930880069732665
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,16,2,0,0.10525920391082763
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,16,4,0,0.06196320056915283
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,16,16,0,0.027153599262237548
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,16,8,0,0.03948479890823364
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,16,32,0,0.021033599972724915
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,128,16,64,0,0.018875199556350707
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,16,1,0,0.2182543992996216
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,16,2,0,0.11946239471435546
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,16,4,0,0.07237600088119507
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,16,8,0,0.04753119945526123
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,16,16,0,0.03699199855327606
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,16,32,0,0.029209598898887634
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,128,16,64,0,0.02696160078048706
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,32,1,0,0.25848960876464844
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,32,2,0,0.13821120262145997
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,32,4,0,0.07820000052452088
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,32,8,0,0.04756959974765777
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,32,16,0,0.03527520000934601
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,32,32,0,0.02696320116519928
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,128,32,64,0,0.020819200575351714
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,32,1,0,0.28694400787353513
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,32,2,0,0.15684959888458253
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,32,4,0,0.09060320258140564
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,32,8,0,0.05792639851570129
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,32,16,0,0.04347360134124756
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,32,32,0,0.03514559864997864
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,128,32,64,0,0.029281601309776306
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,64,2,0,0.19873440265655518
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,64,1,0,0.36795361042022706
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,64,4,0,0.11359039545059205
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,64,8,0,0.06748160123825073
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,64,16,0,0.047513601183891294
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,64,32,0,0.03531199991703034
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,128,64,64,0,0.02919679880142212
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,64,1,0,0.44353442192077636
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,64,2,0,0.24003679752349855
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,64,4,0,0.13768160343170166
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,64,8,0,0.08509759902954102
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,64,16,0,0.059943997859954835
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,64,32,0,0.04565120041370392
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,128,64,64,0,0.03805440068244934
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,128,1,0,0.5816527843475342
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,128,2,0,0.3103024005889893
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,128,4,0,0.17655359506607055
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,128,8,0,0.10539040565490723
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,128,16,0,0.07181919813156128
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,128,32,0,0.053692799806594846
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,128,128,64,0,0.043558400869369504
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,128,1,0,0.7289120197296143
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,128,2,0,0.3919615983963013
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,128,4,0,0.22381598949432374
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,128,8,0,0.135697603225708
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,128,16,0,0.0921343982219696
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,128,32,0,0.06820480227470398
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,128,128,64,0,0.05763999819755554
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,256,1,0,1.0892815589904785
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,256,2,0,0.5797232151031494
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,256,4,0,0.3227519989013672
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,256,8,0,0.19500960111618043
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,256,32,0,0.09117760062217713
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,256,16,0,0.12662559747695923
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,128,256,64,0,0.07212960124015808
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,256,1,0,1.3440671920776368
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,256,2,0,0.7112287998199462
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,256,4,0,0.3992095947265625
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,256,8,0,0.2401599884033203
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,256,16,0,0.15898239612579346
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,256,32,0,0.11674879789352417
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,128,256,64,0,0.09415199756622314
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,512,1,0,2.281083106994629
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,512,2,0,1.2061792373657227
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,512,4,0,0.6553887844085693
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,512,8,0,0.3848623991012573
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,512,16,0,0.24643681049346924
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,512,32,0,0.17671680450439453
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,128,512,64,0,0.13984160423278807
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,512,1,0,2.782459259033203
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,512,4,0,0.8019519805908203
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,512,2,0,1.4615039825439453
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,512,8,0,0.4711952209472656
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,512,16,0,0.30616159439086915
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,512,32,0,0.22021279335021973
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,128,512,64,0,0.1746000051498413
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,256,1,1,0,0.29530720710754393
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,256,1,4,0,0.0844048023223877
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,256,1,2,0,0.1617136001586914
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,256,1,8,0,0.04973120093345642
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,256,1,16,0,0.03110719919204712
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,256,1,32,0,0.023068800568580627
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,256,1,64,0,0.017025600373744964
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,256,1,1,0,0.2977263927459717
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,256,1,2,0,0.15852160453796388
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,256,1,4,0,0.09069439768791199
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,256,1,8,0,0.05577920079231262
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,256,1,16,0,0.03726719915866852
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,256,1,32,0,0.03091840147972107
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,256,1,64,0,0.025088000297546386
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,256,16,2,0,0.20162560939788818
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,256,16,1,0,0.3843487977981567
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,256,16,4,0,0.10897599458694458
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,256,16,8,0,0.06380320191383362
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,256,16,16,0,0.041652798652648926
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,256,16,32,0,0.0312608003616333
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,256,16,64,0,0.025094398856163026
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,256,16,1,0,0.4063375949859619
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,256,16,2,0,0.22144479751586915
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,256,16,4,0,0.12531360387802123
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,256,16,8,0,0.07613919973373413
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,256,16,16,0,0.049737599492073056
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,256,16,32,0,0.03931359946727753
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,256,16,64,0,0.03314720094203949
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,256,32,1,0,0.49807038307189944
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,256,32,2,0,0.2639039993286133
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,256,32,4,0,0.14391839504241943
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,256,32,8,0,0.08259199857711792
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,256,32,16,0,0.05379040241241455
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,256,32,32,0,0.04134719967842102
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,256,32,64,0,0.031123200058937074
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,256,32,2,0,0.2933664083480835
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,256,32,1,0,0.5464176177978516
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,256,32,4,0,0.1643504023551941
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,256,32,8,0,0.09957280158996581
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,256,32,16,0,0.06597599983215333
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,256,32,32,0,0.05169600248336792
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,256,32,64,0,0.04148319959640503
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,256,64,1,0,0.7155856132507324
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,256,64,2,0,0.37776799201965333
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,256,64,4,0,0.2076575994491577
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,256,64,8,0,0.12548320293426513
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,256,64,16,0,0.07836480140686035
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,256,64,32,0,0.05628799796104431
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,256,64,64,0,0.04564320147037506
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,256,64,1,0,0.8564895629882813
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,256,64,2,0,0.4569375991821289
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,256,64,4,0,0.2552783966064453
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,256,64,8,0,0.1530959963798523
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,256,64,16,0,0.09891679883003235
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,256,64,64,0,0.05974559783935547
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,256,64,32,0,0.07213280200958253
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,256,128,1,0,1.141004753112793
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,256,128,2,0,0.5992047786712646
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,256,128,8,0,0.19925600290298462
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,256,128,4,0,0.3319808006286621
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,256,128,16,0,0.12775039672851562
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,256,128,32,0,0.09262719750404358
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,256,128,64,0,0.07430239915847778
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,256,128,1,0,1.4289711952209472
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,256,128,2,0,0.7569039821624756
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,256,128,4,0,0.41947040557861326
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,256,128,8,0,0.25024640560150146
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,256,128,16,0,0.16177279949188234
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,256,128,32,0,0.1180832028388977
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,256,128,64,0,0.09461280107498168
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,256,256,1,0,2.148124885559082
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,256,256,2,0,1.1304207801818849
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,256,256,4,0,0.6259168148040771
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,256,256,8,0,0.36676480770111086
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,256,256,16,0,0.23623039722442626
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,1,256,256,64,0,0.13490719795227052
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,256,256,32,0,0.17068320512771606
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,256,256,1,0,2.6491216659545898
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,256,256,2,0,1.3915696144104004
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,256,256,4,0,0.765558385848999
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,256,256,8,0,0.45349440574645994
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,256,256,16,0,0.2936111927032471
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,256,256,32,0,0.21566240787506102
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,1,256,256,64,0,0.17159520387649535
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,1,1,0,0.014974400401115417
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,1,2,0,0.014734399318695069
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,1,4,0,0.01478559970855713
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,1,8,0,0.014593599736690522
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,1,16,0,0.014732800424098969
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,1,32,0,0.012998400628566742
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,1,64,0,0.012880000472068786
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,1,1,0,0.02250719964504242
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,1,2,0,0.0209647998213768
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,1,4,0,0.02115679979324341
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,1,8,0,0.02114560008049011
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,1,16,0,0.022716799378395082
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,1,32,0,0.020927999913692475
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,1,64,0,0.019006399810314177
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,16,1,0,0.015475200116634369
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,16,2,0,0.016808000206947327
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,16,4,0,0.014590400457382201
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,16,8,0,0.014791999757289887
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,16,16,0,0.014688000082969666
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,16,32,0,0.014632000029087067
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,16,64,0,0.014683200418949128
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,16,1,0,0.023076799511909486
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,16,2,0,0.022987200319766997
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,16,4,0,0.022968000173568724
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,16,8,0,0.02287199944257736
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,16,16,0,0.020838400721549986
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,16,32,0,0.021054400503635405
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,16,64,0,0.020883199572563172
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,32,1,0,0.01690399944782257
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,32,2,0,0.014745600521564484
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,32,4,0,0.016227200627326965
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,32,8,0,0.014686399698257446
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,32,16,0,0.014726400375366211
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,32,32,0,0.014878399670124054
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,32,64,0,0.014873600006103516
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,32,1,0,0.025094398856163026
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,32,2,0,0.022950400412082673
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,32,4,0,0.021091200411319733
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,32,8,0,0.021073600649833678
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,32,32,0,0.02099519968032837
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,32,16,0,0.021055999398231506
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,32,64,0,0.020795199275016784
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,64,1,0,0.01876319944858551
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,64,2,0,0.01685120016336441
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,64,8,0,0.014881600439548493
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,64,4,0,0.014788800477981567
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,64,16,0,0.015595200657844543
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,64,32,0,0.014945599436759948
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,64,64,0,0.014787200093269347
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,64,1,0,0.027161601185798644
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,64,2,0,0.02496960014104843
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,64,4,0,0.023051199316978455
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,64,8,0,0.020945599675178526
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,64,16,0,0.02294880002737045
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,64,32,0,0.022868800163269042
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,64,64,0,0.02096800059080124
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,128,1,0,0.020899200439453126
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,128,2,0,0.018943999707698823
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,128,4,0,0.01698080003261566
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,128,16,0,0.016864000260829924
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,128,8,0,0.016817599534988403
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,128,32,0,0.016731199622154237
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,128,64,0,0.015003199875354766
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,128,2,0,0.0261680006980896
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,128,1,0,0.03115360140800476
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,128,4,0,0.024775999784469604
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,128,8,0,0.023099200427532197
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,128,16,0,0.023046399652957916
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,128,32,0,0.023025600612163542
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,128,64,0,0.022996799647808076
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,256,1,0,0.03116639852523804
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,256,2,0,0.025051200389862062
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,256,4,0,0.021007999777793884
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,256,8,0,0.019118399918079378
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,256,16,0,0.018934400379657747
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,256,32,0,0.01886879950761795
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,256,64,0,0.01704320013523102
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,256,1,0,0.03943040072917938
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,256,2,0,0.03317919969558716
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,256,4,0,0.02905920147895813
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,256,8,0,0.027020800113677978
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,256,16,0,0.027131199836730957
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,256,32,0,0.02510559856891632
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,256,64,0,0.025099200010299683
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,512,1,0,0.05372959971427917
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,512,2,0,0.033220800757408145
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,512,4,0,0.029014399647712706
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,512,8,0,0.023665599524974823
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,512,16,0,0.02298399955034256
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,512,32,0,0.022891199588775633
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,512,64,0,0.02281759977340698
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,512,1,0,0.06606400012969971
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,512,2,0,0.04349279999732971
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,512,4,0,0.03542880117893219
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,512,8,0,0.03113119900226593
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,512,16,0,0.029292801022529603
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,512,32,0,0.027166399359703063
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,512,64,0,0.027011200785636902
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,1024,1,0,0.10491199493408203
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,1024,2,0,0.06195359826087952
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,1024,4,0,0.03943679928779602
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,1024,8,0,0.03516480028629303
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,1024,16,0,0.029145601391792297
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,1024,32,0,0.029020801186561584
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,1024,64,0,0.02895520031452179
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,1024,1,0,0.12361760139465332
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,1024,2,0,0.0721776008605957
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,1024,4,0,0.048635199666023254
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,1024,8,0,0.041388800740242
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,1024,16,0,0.03528960049152374
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,1024,32,0,0.03522399961948395
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,1024,64,0,0.03336000144481659
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,1536,1,0,0.16324160099029542
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,1536,2,0,0.09667199850082397
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,1536,4,0,0.05982720255851746
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,1536,8,0,0.04367839992046356
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,1536,16,0,0.03739520013332367
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,1536,32,0,0.035438400506973264
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,1536,64,0,0.03517920076847077
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,1536,1,0,0.18610719442367554
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,1536,2,0,0.10704480409622193
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,1536,4,0,0.06783679723739625
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,1536,8,0,0.04951359927654266
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,1536,16,0,0.04344319999217987
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,1536,32,0,0.039345601201057435
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,1536,64,0,0.03945119976997376
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,2048,1,0,0.23450880050659179
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,2048,2,0,0.13415679931640626
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,2048,4,0,0.08068959712982178
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,2048,8,0,0.0537663996219635
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,2048,16,0,0.045582398772239685
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,2048,32,0,0.043486401438713074
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,2048,64,0,0.04349119961261749
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,2048,1,0,0.2525552034378052
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,2048,2,0,0.1455024003982544
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,2048,4,0,0.0884880006313324
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,2048,8,0,0.05780479907989502
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,2048,16,0,0.05158079862594604
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,2048,32,0,0.04556800127029419
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,2048,64,0,0.045552000403404236
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,3072,1,0,0.40227999687194826
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,3072,2,0,0.22189440727233886
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,3072,4,0,0.12953280210494994
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,3072,8,0,0.0826640009880066
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,3072,16,0,0.06202080249786377
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,3072,32,0,0.05788480043411255
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,3072,64,0,0.05570719838142395
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,3072,1,0,0.4124879837036133
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,3072,2,0,0.22755680084228516
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,3072,4,0,0.1361232042312622
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,3072,8,0,0.08838880062103271
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,3072,16,0,0.06595360040664673
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,3072,32,0,0.05777599811553955
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,3072,64,0,0.055067199468612674
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,4096,1,0,0.6183199882507324
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,4096,2,0,0.33880159854888914
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,4096,4,0,0.18885279893875123
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,4096,8,0,0.1174239993095398
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,4096,16,0,0.07816479802131653
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,4096,32,0,0.07229599952697754
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,4096,64,0,0.0700543999671936
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,4096,1,0,0.5967552185058593
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,4096,2,0,0.3264559984207153
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,4096,4,0,0.18892799615859984
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,4096,8,0,0.11772799491882324
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,4096,16,0,0.0766048014163971
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,4096,32,0,0.07025439739227295
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,4096,64,0,0.06594240069389343
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,6144,1,0,1.159513568878174
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,6144,2,0,0.622372817993164
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,6144,4,0,0.3482383966445923
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,6144,8,0,0.2008944034576416
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,6144,16,0,0.12773599624633789
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,6144,32,0,0.10283199548721314
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,6144,64,0,0.09670079946517944
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,6144,1,0,1.051155185699463
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,6144,4,0,0.3160304069519043
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,6144,2,0,0.5619200229644775
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,6144,8,0,0.18919520378112792
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,6144,16,0,0.12155040502548217
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,6144,32,0,0.09461439847946167
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,6144,64,0,0.08848000168800355
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,8192,1,0,1.9529727935791015
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,8192,8,0,0.3040208101272583
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,8192,4,0,0.5409103870391846
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,8192,2,0,0.9794879913330078
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,8192,16,0,0.19201600551605225
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,8192,32,0,0.13154720067977904
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,8192,64,0,0.1251904010772705
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,8192,1,0,1.614771270751953
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,8192,2,0,0.8595919609069824
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,8192,8,0,0.2729199886322021
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,8192,4,0,0.46926078796386717
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,8192,16,0,0.17262879610061646
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,8192,32,0,0.11956640481948852
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,8192,64,0,0.11116479635238648
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,10240,4,0,0.793287992477417
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,10240,2,0,1.4211968421936034
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,10240,1,0,2.737180709838867
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,10240,8,0,0.4350399971008301
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,10240,16,0,0.2682703971862793
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,10240,32,0,0.1632591962814331
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,10240,64,0,0.15276000499725342
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,10240,1,0,2.3009904861450194
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,10240,4,0,0.6516335964202881
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,10240,2,0,1.209614372253418
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,10240,8,0,0.36976640224456786
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,10240,16,0,0.2321631908416748
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,10240,64,0,0.13178240060806273
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,10240,32,0,0.14985280036926268
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,12288,1,0,3.8447120666503904
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,12288,2,0,1.9702592849731446
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,12288,4,0,1.073025608062744
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,12288,8,0,0.5748640060424804
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,12288,16,0,0.3416143894195557
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,12288,32,0,0.21762399673461913
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,12288,64,0,0.18071839809417725
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,12288,1,0,3.080803108215332
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,12288,2,0,1.5993311882019043
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,12288,8,0,0.48420162200927735
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,12288,4,0,0.8616720199584961
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,12288,16,0,0.2910576105117798
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,12288,32,0,0.1940176010131836
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,12288,64,0,0.1542415976524353
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,16384,1,0,6.791657257080078
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,16384,8,0,0.9627327919006348
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,16384,4,0,1.7062959671020508
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,16384,2,0,3.282747268676758
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,16384,16,0,0.5419583797454834
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,16384,32,0,0.34165918827056885
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,16384,64,0,0.23595359325408935
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,16384,1,0,5.047758483886719
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,16384,8,0,0.7527616024017334
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,16384,2,0,2.5825952529907226
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,16384,4,0,1.3763456344604492
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,16384,16,0,0.4390895843505859
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,16384,32,0,0.2851696014404297
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,16384,64,0,0.19910240173339844
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,1,32768,8,0,3.2484367370605467
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,1,32768,4,0,6.3016304016113285
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,1,32768,2,0,12.873583984375
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,1,32768,16,0,1.7291488647460938
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,1,32768,32,0,0.9867216110229492
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,1,32768,1,0,26.628515625
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,1,32768,64,0,0.6183856010437012
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,1,32768,8,0,2.3533023834228515
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,1,32768,4,0,4.627534484863281
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,1,32768,2,0,9.025889587402343
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,1,32768,1,0,18.05800018310547
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,1,32768,32,0,0.7655776023864747
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,1,32768,16,0,1.3014960289001465
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,1,32768,64,0,0.49581117630004884
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,1,1,0,0.020868800580501556
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,1,2,0,0.014827199280261993
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,1,4,0,0.014788800477981567
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,1,8,0,0.01480959951877594
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,1,16,0,0.012824000418186187
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,1,32,0,0.014548799395561219
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,1,64,0,0.01292639970779419
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,1,1,0,0.026364800333976746
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,1,2,0,0.022838400304317476
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,1,4,0,0.020827199518680572
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,1,8,0,0.020895999670028687
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,1,16,0,0.020828799903392793
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,1,32,0,0.020979200303554536
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,1,64,0,0.020942400395870208
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,16,1,0,0.01890240013599396
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,16,2,0,0.014913600683212281
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,16,4,0,0.016612799465656282
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,16,8,0,0.014699199795722961
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,16,16,0,0.01465280055999756
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,16,32,0,0.014393599331378936
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,16,64,0,0.014705599844455719
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,16,1,0,0.027081599831581114
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,16,2,0,0.022950400412082673
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,16,4,0,0.022870400547981264
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,16,8,0,0.02096959948539734
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,16,16,0,0.022838400304317476
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,16,32,0,0.020923200249671935
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,32,1,0,0.020953600108623505
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,32,2,0,0.016704000532627106
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,32,4,0,0.016672000288963318
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,16,64,0,0.02088479995727539
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,32,8,0,0.015872000157833098
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,32,16,0,0.014753599464893342
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,32,32,0,0.014758400619029999
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,32,64,0,0.014843200147151948
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,32,1,0,0.029246398806571962
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,32,2,0,0.025073599815368653
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,32,4,0,0.02094080001115799
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,32,8,0,0.022977599501609804
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,32,16,0,0.0228752002120018
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,32,32,0,0.02285120040178299
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,32,64,0,0.021160000562667848
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,64,1,0,0.02502079904079437
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,64,2,0,0.01876160055398941
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,64,4,0,0.01684959977865219
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,64,8,0,0.01672320067882538
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,64,16,0,0.01480959951877594
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,64,32,0,0.014886400103569031
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,64,64,0,0.014948800206184387
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,64,1,0,0.03328959941864014
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,64,2,0,0.02707200050354004
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,64,4,0,0.024977600574493407
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,64,8,0,0.023070399463176728
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,64,16,0,0.02295520007610321
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,64,32,0,0.02290239930152893
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,64,64,0,0.02292799949645996
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,128,1,0,0.031799998879432675
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,128,2,0,0.02099040001630783
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,128,4,0,0.018812799453735353
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,128,8,0,0.016921600699424742
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,128,16,0,0.016883200407028197
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,128,32,0,0.01488959938287735
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,128,64,0,0.014886400103569031
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,128,1,0,0.041503998637199405
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,128,2,0,0.031240001320838928
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,128,4,0,0.027246400713920593
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,128,8,0,0.023095999658107758
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,128,16,0,0.02306240051984787
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,128,32,0,0.023104000091552734
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,128,64,0,0.023004800081253052
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,256,1,0,0.047513601183891294
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,256,2,0,0.031195199489593504
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,256,4,0,0.024990400671958922
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,256,8,0,0.02085919976234436
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,256,16,0,0.020796799659729005
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,256,32,0,0.020947200059890748
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,256,64,0,0.018803200125694274
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,256,1,0,0.06188639998435974
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,256,2,0,0.04125120043754578
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,256,4,0,0.03331040143966675
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,256,8,0,0.029164800047874452
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,256,32,0,0.024953599274158477
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,256,16,0,0.02701919972896576
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,256,64,0,0.02497600018978119
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,512,1,0,0.09228479862213135
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,512,2,0,0.05395680069923401
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,512,4,0,0.035183998942375186
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,512,8,0,0.029102399945259094
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,512,16,0,0.024900799989700316
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,512,32,0,0.024542400240898134
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,512,64,0,0.02298080027103424
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,512,1,0,0.1129520058631897
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,512,2,0,0.06581599712371826
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,512,4,0,0.043558400869369504
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,512,8,0,0.03526880145072937
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,512,16,0,0.031001600623130798
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,512,32,0,0.031116798520088196
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,512,64,0,0.029120001196861266
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,1024,1,0,0.1852880001068115
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,1024,2,0,0.10598239898681641
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,1024,4,0,0.06292799711227418
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,1024,8,0,0.03938080072402954
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,1024,16,0,0.035334399342536925
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,1024,32,0,0.03129920065402984
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,1024,64,0,0.031086400151252747
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,1024,1,0,0.21842238903045655
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,1024,2,0,0.12479840517044068
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,1024,4,0,0.0774511992931366
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,1024,8,0,0.05008159875869751
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,1024,16,0,0.04344959855079651
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,1024,32,0,0.03727999925613403
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,1024,64,0,0.03537600040435791
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,1536,1,0,0.30065920352935793
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,1536,2,0,0.16628960371017457
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,1536,4,0,0.0994704008102417
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,1536,8,0,0.06394720077514648
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,1536,16,0,0.04551199972629547
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,1536,32,0,0.03964000046253204
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,1536,64,0,0.039268800616264345
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,1536,1,0,0.3371135950088501
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,1536,2,0,0.18721120357513427
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,1536,4,0,0.10983200073242187
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,1536,8,0,0.07020480036735535
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,1536,16,0,0.05165759921073913
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,1536,32,0,0.043547201156616214
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,1536,64,0,0.041684800386428834
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,2048,1,0,0.441161584854126
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,2048,2,0,0.23639678955078125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,2048,4,0,0.13599679470062256
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,2048,8,0,0.08439199924468994
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,2048,16,0,0.0557856023311615
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,2048,32,0,0.04959200024604797
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,2048,64,0,0.04745439887046814
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,2048,1,0,0.4695631980895996
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,2048,2,0,0.25772800445556643
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,2048,4,0,0.14780960083007813
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,2048,8,0,0.09238240122795105
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,2048,16,0,0.05988640189170837
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,2048,32,0,0.0557856023311615
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,2048,64,0,0.04962239861488342
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,3072,1,0,0.7705167770385742
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,3072,2,0,0.406492805480957
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,3072,4,0,0.23079359531402588
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,3072,8,0,0.13343039751052857
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,3072,16,0,0.08641279935836792
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,3072,32,0,0.06605759859085084
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,3072,64,0,0.06172320246696472
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,3072,1,0,0.7739568233489991
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,3072,2,0,0.41638717651367185
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,3072,4,0,0.2337968111038208
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,3072,8,0,0.13984160423278807
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,3072,16,0,0.09269599914550782
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,3072,32,0,0.07006239891052246
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,3072,64,0,0.06413440108299255
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,4096,1,0,1.216766357421875
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,4096,2,0,0.619326400756836
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,4096,4,0,0.34249279499053953
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,4096,8,0,0.19617279767990112
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,4096,16,0,0.12369279861450196
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,4096,32,0,0.08443359732627868
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,4096,64,0,0.078302401304245
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,4096,1,0,1.135919952392578
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,4096,2,0,0.6047520160675048
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,4096,4,0,0.33316640853881835
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,4096,8,0,0.19309920072555542
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,4096,16,0,0.12532639503479004
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,4096,32,0,0.08457599878311158
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,4096,64,0,0.07632319927215576
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,6144,1,0,2.2873727798461916
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,6144,2,0,1.1646495819091798
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,6144,4,0,0.6450784206390381
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,6144,16,0,0.21220800876617432
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,6144,8,0,0.35348799228668215
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,6144,64,0,0.11107200384140015
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,6144,32,0,0.13696960210800171
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,6144,2,0,1.061582374572754
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,6144,1,0,2.036342430114746
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,6144,4,0,0.5772016048431396
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,6144,8,0,0.32567520141601564
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,6144,16,0,0.19708800315856934
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,6144,32,0,0.13228319883346557
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,6144,64,0,0.10492160320281982
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,8192,1,0,3.6653182983398436
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,8192,4,0,1.000715160369873
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,8192,2,0,1.873641586303711
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,8192,8,0,0.5605072021484375
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,8192,16,0,0.31528799533843993
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,8192,32,0,0.20470879077911378
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,8192,64,0,0.14165760278701783
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,8192,1,0,3.1834463119506835
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,8192,2,0,1.6319839477539062
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,8192,8,0,0.48265438079833983
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,8192,4,0,0.8707056045532227
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,8192,16,0,0.28813920021057127
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,8192,32,0,0.18935359716415406
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,8192,64,0,0.13369920253753662
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,10240,1,0,5.595870590209961
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,10240,2,0,2.855633544921875
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,10240,8,0,0.7813087940216065
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,10240,4,0,1.4346863746643066
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,10240,16,0,0.4530479907989502
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,10240,32,0,0.28362560272216797
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,10240,64,0,0.17657439708709716
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,10240,1,0,4.503726577758789
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,10240,2,0,2.3194671630859376
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,10240,4,0,1.2304512023925782
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,10240,8,0,0.671011209487915
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,10240,16,0,0.38806400299072263
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,10240,32,0,0.24995839595794678
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,10240,64,0,0.16857279539108277
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,12288,4,0,2.0427791595458986
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,12288,2,0,3.8627872467041016
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,12288,1,0,7.803396606445313
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,12288,8,0,1.0514656066894532
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,12288,16,0,0.6106287956237793
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,12288,32,0,0.3650768041610718
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,12288,64,0,0.2337519884109497
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,12288,1,0,6.123761749267578
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,12288,8,0,0.8825807571411133
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,12288,4,0,1.6274608612060546
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,12288,2,0,3.1438224792480467
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,12288,32,0,0.31072158813476564
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,12288,16,0,0.5034192085266114
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,12288,64,0,0.21504480838775636
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,16384,8,0,1.7615856170654296
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,16384,4,0,3.3502864837646484
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,16384,2,0,6.729374694824219
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,16384,1,0,14.181039428710937
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,16384,16,0,0.962923240661621
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,16384,32,0,0.5602672100067139
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,16384,64,0,0.3604752063751221
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,16384,4,0,2.5999584197998047
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,16384,8,0,1.3862048149108888
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,16384,2,0,5.111336135864258
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,16384,1,0,10.280899047851562
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,16384,32,0,0.46679039001464845
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,16384,16,0,0.7814879894256592
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,16384,64,0,0.31227359771728513
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,2,32768,8,0,6.408080291748047
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,2,32768,4,0,12.850709533691406
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,2,32768,16,0,3.2623119354248047
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,2,32768,32,0,1.7462160110473632
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,2,32768,64,0,1.0402015686035155
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,2,32768,2,0,26.067477416992187
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,2,32768,4,0,9.175484466552735
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,2,32768,1,0,54.10630493164062
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,2,32768,2,0,18.158230590820313
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,2,32768,8,0,4.65684814453125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,2,32768,16,0,2.444876861572266
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,2,32768,32,0,1.365552043914795
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,2,32768,64,0,0.8215248107910156
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,1,1,0,0.02784000039100647
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,2,32768,1,0,36.02559814453125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,1,2,0,0.020934399962425233
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,1,4,0,0.016865600645542145
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,1,8,0,0.01677280068397522
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,1,16,0,0.014841599762439728
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,1,32,0,0.014697599411010741
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,1,64,0,0.014868800342082978
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,1,1,0,0.03320479989051819
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,1,2,0,0.026494398713111877
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,1,4,0,0.023003199696540834
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,1,8,0,0.02288320064544678
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,1,16,0,0.023035199940204622
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,1,32,0,0.021166400611400606
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,1,64,0,0.020873600244522096
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,16,1,0,0.025947201251983642
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,16,2,0,0.01890240013599396
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,16,4,0,0.01483519971370697
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,16,8,0,0.014654399454593658
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,16,16,0,0.014895999431610107
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,16,32,0,0.014764800667762756
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,16,64,0,0.014771200716495514
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,16,1,0,0.03326399922370911
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,16,2,0,0.026902401447296144
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,16,4,0,0.02486239969730377
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,16,8,0,0.02287199944257736
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,16,16,0,0.020985600352287293
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,16,32,0,0.0228752002120018
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,16,64,0,0.023100799322128295
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,32,1,0,0.029355201125144958
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,32,2,0,0.020742399990558623
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,32,4,0,0.017105600237846373
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,32,8,0,0.014683200418949128
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,32,16,0,0.014900800585746766
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,32,32,0,0.014638400077819825
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,32,64,0,0.01472959965467453
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,32,1,0,0.03901599943637848
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,32,4,0,0.025016000866889952
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,32,2,0,0.02707839906215668
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,32,8,0,0.023012800514698027
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,32,16,0,0.022951999306678773
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,32,32,0,0.02295680046081543
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,32,64,0,0.02099200040102005
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,64,1,0,0.037222400307655334
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,64,2,0,0.02502720057964325
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,64,4,0,0.018905599415302277
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,64,8,0,0.016190400719642638
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,64,16,0,0.016678400337696075
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,64,32,0,0.016910399496555328
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,64,64,0,0.014734399318695069
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,64,4,0,0.027132800221443175
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,64,1,0,0.04765279889106751
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,64,2,0,0.03323839902877808
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,64,8,0,0.023024000227451324
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,64,16,0,0.023129600286483764
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,64,32,0,0.022894400358200073
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,64,64,0,0.023235200345516203
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,128,1,0,0.04959680140018463
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,128,2,0,0.033185601234436035
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,128,4,0,0.021007999777793884
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,128,8,0,0.019662399590015412
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,128,16,0,0.01874080002307892
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,128,32,0,0.016040000319480895
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,128,64,0,0.014830400049686433
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,128,1,0,0.0641152024269104
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,128,2,0,0.04288800060749054
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,128,4,0,0.03152480125427246
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,128,8,0,0.02704159915447235
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,128,16,0,0.025135999917984007
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,128,32,0,0.024780799448490144
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,128,64,0,0.023099200427532197
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,256,1,0,0.08643199801445008
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,256,2,0,0.04760960042476654
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,256,4,0,0.031188800930976868
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,256,8,0,0.024993599951267244
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,256,16,0,0.022995199263095855
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,256,32,0,0.019495999813079833
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,256,64,0,0.018987199664115904
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,256,1,0,0.10621119737625122
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,256,2,0,0.06259040236473083
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,256,4,0,0.04129279851913452
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,256,8,0,0.03525919914245605
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,256,16,0,0.029096001386642457
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,256,32,0,0.0271232008934021
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,256,64,0,0.0270687997341156
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,512,1,0,0.16293760538101196
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,512,2,0,0.09271360039710999
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,512,4,0,0.0541055977344513
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,512,8,0,0.03487519919872284
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,512,16,0,0.029142400622367857
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,512,32,0,0.02697120010852814
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,512,64,0,0.024932800233364104
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,512,1,0,0.2000607967376709
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,512,2,0,0.11358400583267211
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,512,4,0,0.06674720048904419
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,512,8,0,0.045638400316238406
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,512,16,0,0.03732160031795502
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,512,32,0,0.033585599064826964
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,512,64,0,0.03121120035648346
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,1024,1,0,0.3455647945404053
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,1024,2,0,0.1892143964767456
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,1024,4,0,0.10953760147094727
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,1024,8,0,0.06591839790344238
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,1024,16,0,0.043398401141166686
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,1024,32,0,0.039166399836540224
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,1024,64,0,0.03537600040435791
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,1024,1,0,0.4039055824279785
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,1024,2,0,0.22124478816986085
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,1024,4,0,0.12740960121154785
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,1024,8,0,0.07837120294570923
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,1024,16,0,0.05371999740600586
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,1024,32,0,0.04536640048027039
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,1024,64,0,0.03933280110359192
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,1536,1,0,0.576473617553711
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,1536,2,0,0.3035072088241577
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,1536,4,0,0.17095199823379517
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,1536,8,0,0.10272639989852905
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,1536,16,0,0.0660207986831665
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,1536,32,0,0.049491199851036075
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,1536,64,0,0.045505601167678836
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,1536,1,0,0.6354015827178955
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,1536,2,0,0.34351840019226076
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,1536,4,0,0.1920896053314209
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,1536,8,0,0.11558239459991455
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,1536,16,0,0.07503679990768433
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,1536,32,0,0.05770559906959534
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,1536,64,0,0.04968160092830658
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,2048,1,0,0.8321824073791504
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,2048,2,0,0.44203839302062986
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,2048,4,0,0.24634718894958496
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,2048,8,0,0.14156160354614258
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,2048,16,0,0.08860160112380981
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,2048,32,0,0.0597536027431488
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,2048,64,0,0.05571200251579285
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,2048,2,0,0.47541441917419436
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,2048,1,0,0.8932720184326172
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,2048,4,0,0.26382400989532473
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,2048,8,0,0.15375519990921022
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,2048,16,0,0.09918879866600036
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,2048,64,0,0.06174399852752686
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,2048,32,0,0.06808000206947326
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,3072,1,0,1.531926441192627
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,3072,4,0,0.41922879219055176
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,3072,2,0,0.775271987915039
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,3072,8,0,0.23633439540863038
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,3072,16,0,0.1437664031982422
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,3072,32,0,0.09408000111579895
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,3072,64,0,0.07611680030822754
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,3072,1,0,1.4979215621948243
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,3072,2,0,0.7865263938903808
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,3072,4,0,0.42641282081604004
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,3072,8,0,0.24293279647827148
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,3072,16,0,0.14984480142593384
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,3072,32,0,0.10083359479904175
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,3072,64,0,0.07839840054512023
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,4096,1,0,2.3020256042480467
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,4096,2,0,1.1856464385986327
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,4096,4,0,0.6497888088226318
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,4096,8,0,0.35469439029693606
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,4096,16,0,0.20876479148864746
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,4096,32,0,0.13767679929733276
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,4096,64,0,0.09474080204963684
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,4096,1,0,2.217507171630859
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,4096,2,0,1.1514047622680663
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,4096,4,0,0.6190512180328369
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,4096,8,0,0.34894719123840334
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,4096,16,0,0.20679359436035155
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,4096,32,0,0.14011039733886718
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,4096,64,0,0.09922879934310913
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,6144,1,0,4.584241485595703
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,6144,2,0,2.273646354675293
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,6144,8,0,0.6591743946075439
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,6144,4,0,1.181993579864502
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,6144,16,0,0.36998400688171384
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,6144,32,0,0.22772159576416015
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,6144,64,0,0.1542847990989685
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,6144,1,0,3.999803161621094
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,6144,2,0,2.059878349304199
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,6144,8,0,0.5988383769989014
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,6144,4,0,1.082652759552002
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,6144,16,0,0.34714879989624026
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,6144,32,0,0.21754240989685059
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,6144,64,0,0.15624639987945557
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,8192,1,0,7.544292449951172
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,8192,2,0,3.6795120239257812
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,8192,4,0,1.929911994934082
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,8192,8,0,1.0486960411071777
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,8192,16,0,0.5697984218597412
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,8192,64,0,0.22685599327087402
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,8192,32,0,0.33929920196533203
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,8192,1,0,6.321622467041015
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,8192,8,0,0.8960831642150879
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,8192,4,0,1.6606895446777343
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,8192,2,0,3.178107261657715
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,8192,16,0,0.5147151947021484
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,8192,32,0,0.31321280002593993
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,8192,64,0,0.21583681106567382
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,10240,8,0,1.4923999786376954
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,10240,4,0,2.7799327850341795
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,10240,2,0,5.889648056030273
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,10240,1,0,11.378015899658203
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,10240,16,0,0.8182784080505371
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,10240,32,0,0.4757023811340332
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,10240,64,0,0.3117759943008423
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,10240,8,0,1.2642000198364258
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,10240,4,0,2.3471200942993162
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,10240,1,0,9.040801239013671
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,10240,2,0,4.553499221801758
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,10240,16,0,0.7103007793426513
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,10240,32,0,0.42508320808410643
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,10240,64,0,0.2832256078720093
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,12288,8,0,2.0020944595336916
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,12288,4,0,3.8919567108154296
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,12288,2,0,8.265475463867187
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,12288,1,0,16.12529754638672
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,12288,16,0,1.090113639831543
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,12288,32,0,0.6288559913635254
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,12288,64,0,0.4035359859466553
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,12288,4,0,3.149588775634766
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,12288,8,0,1.6824783325195312
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,12288,2,0,6.150215911865234
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,12288,1,0,12.420238494873047
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,12288,16,0,0.9212224006652832
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,12288,32,0,0.5509503841400146
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,12288,64,0,0.354587197303772
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,4,16384,8,0,3.354140853881836
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,4,16384,4,0,7.025414276123047
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,4,16384,2,0,13.788436889648438
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,4,16384,16,0,1.766476821899414
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,4,16384,32,0,1.0280960083007813
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,4,16384,64,0,0.6079984188079834
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,4,16384,1,0,28.106951904296874
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,4,16384,8,0,2.6690496444702148
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,4,16384,2,0,10.385363006591797
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,4,16384,4,0,5.151108932495117
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,4,16384,1,0,19.99754180908203
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,4,16384,16,0,1.447651195526123
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,4,16384,64,0,0.5254687786102294
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,4,16384,32,0,0.8419039726257325
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,1,1,0,0.037118399143218996
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,1,2,0,0.025968000292778015
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,1,4,0,0.019351999461650848
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,1,8,0,0.016518400609493257
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,1,16,0,0.016756799817085267
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,1,32,0,0.014718399941921234
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,1,64,0,0.014572800695896148
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,1,1,0,0.037390398979187014
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,1,2,0,0.033081600069999696
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,1,4,0,0.025094398856163026
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,1,8,0,0.022720000147819518
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,1,16,0,0.022912000119686127
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,1,32,0,0.02285760045051575
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,1,64,0,0.020905600488185884
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,16,1,0,0.037283200025558474
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,16,2,0,0.02508159875869751
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,16,4,0,0.02091200053691864
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,16,8,0,0.016774399578571318
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,16,16,0,0.01616320013999939
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,16,32,0,0.014713600277900696
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,16,64,0,0.014865599572658539
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,16,1,0,0.04536159932613373
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,16,2,0,0.033232000470161435
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,16,4,0,0.026969599723815917
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,16,8,0,0.023000000417232512
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,16,16,0,0.022868800163269042
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,16,32,0,0.022843199968338012
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,16,64,0,0.022944000363349915
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,32,1,0,0.04350399971008301
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,32,2,0,0.03102560043334961
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,32,4,0,0.021823999285697938
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,32,8,0,0.01682399958372116
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,32,16,0,0.014745600521564484
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,32,32,0,0.014881600439548493
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,32,64,0,0.014881600439548493
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,32,2,0,0.03925119936466217
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,32,1,0,0.05370240211486817
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,32,4,0,0.02898559868335724
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,32,8,0,0.022995199263095855
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,32,16,0,0.022896000742912294
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,32,32,0,0.0229312002658844
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,64,1,0,0.05755360126495361
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,32,64,0,0.021817600727081297
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,64,4,0,0.02481440007686615
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,64,8,0,0.018935999274253844
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,64,2,0,0.03720319867134094
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,64,16,0,0.016737599670886994
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,64,32,0,0.014902399480342865
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,64,64,0,0.01674720048904419
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,64,1,0,0.07179840207099915
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,64,2,0,0.04754399955272674
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,64,4,0,0.03518719971179962
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,64,8,0,0.027031999826431275
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,64,16,0,0.022918400168418885
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,64,32,0,0.02292959988117218
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,64,64,0,0.02301120012998581
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,128,1,0,0.08656960129737853
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,128,2,0,0.050727999210357665
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,128,4,0,0.031206399202346802
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,128,8,0,0.02313600033521652
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,128,16,0,0.018955199420452117
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,128,32,0,0.018755200505256652
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,128,64,0,0.016791999340057373
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,128,1,0,0.1093824028968811
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,128,2,0,0.0641871988773346
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,128,4,0,0.04158560037612915
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,128,8,0,0.03128800094127655
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,128,16,0,0.027161601185798644
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,128,32,0,0.02691200077533722
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,128,64,0,0.02296479940414429
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,256,1,0,0.15208640098571777
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,256,2,0,0.08709440231323243
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,256,4,0,0.050297600030899045
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,256,8,0,0.03314880132675171
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,256,16,0,0.027006399631500245
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,256,32,0,0.02452320009469986
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,256,64,0,0.020983999967575072
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,256,1,0,0.188809597492218
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,256,2,0,0.10907679796218872
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,256,4,0,0.06364799737930298
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,256,8,0,0.04341920018196106
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,256,32,0,0.029216000437736513
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,256,16,0,0.03538880050182343
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,256,64,0,0.02922559976577759
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,512,1,0,0.3019488096237183
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,512,2,0,0.16452640295028687
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,512,8,0,0.059683197736740114
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,512,4,0,0.09494879841804504
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,512,16,0,0.037459200620651244
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,512,32,0,0.03332000076770782
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,512,64,0,0.028992000222206115
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,512,1,0,0.36963839530944825
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,512,2,0,0.20492000579833985
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,512,4,0,0.117249596118927
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,512,8,0,0.07204639911651611
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,512,16,0,0.04742240011692047
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,512,32,0,0.04137440025806427
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,512,64,0,0.035278400778770445
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,1024,1,0,0.661956787109375
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,1024,2,0,0.3514431953430176
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,1024,4,0,0.1965023994445801
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,1024,8,0,0.1145583987236023
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,1024,16,0,0.07213600277900696
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,1024,64,0,0.043424001336097716
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,1024,32,0,0.04952319860458374
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,1024,1,0,0.7688560009002685
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,1024,2,0,0.4102176189422607
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,1024,4,0,0.22820000648498534
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,1024,8,0,0.1333840012550354
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,1024,16,0,0.08430879712104797
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,1024,32,0,0.05793439745903015
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,1024,64,0,0.05175039768218994
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,1536,1,0,1.1126015663146973
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,1536,2,0,0.5761007785797119
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,1536,4,0,0.3138672113418579
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,1536,8,0,0.1820896029472351
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,1536,16,0,0.11122560501098633
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,1536,32,0,0.07422559857368469
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,1536,64,0,0.05787039995193481
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,1536,1,0,1.2312800407409668
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,1536,2,0,0.6433216094970703
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,1536,4,0,0.3511392116546631
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,1536,8,0,0.20342400074005126
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,1536,32,0,0.08664960265159607
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,1536,16,0,0.12524800300598143
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,1536,64,0,0.06808800101280213
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,2048,1,0,1.670088005065918
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,2048,4,0,0.4561600208282471
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,2048,2,0,0.8442159652709961
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,2048,8,0,0.25497438907623293
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,2048,16,0,0.1529520034790039
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,2048,32,0,0.10131200551986694
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,2048,64,0,0.07208160161972046
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,2048,1,0,1.7425167083740234
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,2048,2,0,0.9101263999938964
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,2048,4,0,0.48893117904663086
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,2048,8,0,0.2778831958770752
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,2048,16,0,0.1686944007873535
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,2048,64,0,0.0819599986076355
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,3072,1,0,2.9270111083984376
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,2048,32,0,0.11534719467163086
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,3072,2,0,1.5056960105895996
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,3072,4,0,0.796449613571167
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,3072,8,0,0.4331552028656006
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,3072,16,0,0.25672159194946287
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,3072,32,0,0.16013760566711427
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,3072,64,0,0.11136480569839477
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,3072,2,0,1.527233600616455
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,3072,1,0,2.9512351989746093
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,3072,4,0,0.8056400299072266
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,3072,8,0,0.4481071949005127
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,3072,16,0,0.2657504081726074
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,3072,32,0,0.1689039945602417
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,3072,64,0,0.12372000217437744
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,4096,1,0,4.648835372924805
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,4096,4,0,1.2091792106628418
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,4096,2,0,2.409008026123047
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,4096,8,0,0.6553088188171386
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,4096,16,0,0.37516160011291505
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,4096,32,0,0.23215999603271484
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,4096,64,0,0.16059520244598388
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,4096,1,0,4.3841087341308596
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,4096,8,0,0.6464816093444824
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,4096,2,0,2.2517776489257812
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,4096,4,0,1.177793598175049
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,4096,16,0,0.37499198913574217
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,4096,32,0,0.23631999492645264
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,4096,64,0,0.16655679941177368
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,6144,1,0,9.070359802246093
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,6144,8,0,1.2371408462524414
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,6144,4,0,2.402668762207031
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,6144,2,0,4.522143936157226
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,6144,32,0,0.4060448169708252
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,6144,16,0,0.6733615875244141
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,6144,64,0,0.2707632064819336
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,6144,1,0,7.938442993164062
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,6144,8,0,1.129030418395996
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,6144,4,0,2.0959552764892577
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,6144,2,0,4.051251220703125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,6144,16,0,0.6342991828918457
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,6144,32,0,0.39061760902404785
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,6144,64,0,0.2590111970901489
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,8,8192,8,0,1.9511247634887696
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,8,8192,4,0,3.9271392822265625
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,8,8192,2,0,7.604164886474609
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,8,8192,1,0,15.560261535644532
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,8,8192,16,0,1.0575504302978516
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,8,8192,32,0,0.6251408100128174
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,8,8192,64,0,0.3900511980056763
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,8,8192,4,0,3.2723758697509764
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,8,8192,8,0,1.7095808029174804
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,8,8192,2,0,6.293494415283203
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,8,8192,1,0,12.541937255859375
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,8,8192,16,0,0.9556015968322754
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,8,8192,32,0,0.5678239822387695
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,8,8192,64,0,0.37204320430755616
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,16,1,1,0,0.0648576021194458
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,1,2,0,0.033164799213409424
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,1,4,0,0.0268528014421463
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,1,8,0,0.022489599883556366
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,1,16,0,0.014927999675273895
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,1,32,0,0.014795200526714325
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,1,64,0,0.014740799367427827
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,16,1,1,0,0.055720001459121704
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,1,2,0,0.03740800023078918
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,1,4,0,0.03140639960765838
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,1,8,0,0.02521120011806488
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,1,16,0,0.023163199424743652
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,1,32,0,0.020891200006008147
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,1,64,0,0.02102400064468384
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,16,16,1,0,0.05994719862937927
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,16,2,0,0.037243199348449704
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,16,4,0,0.02513760030269623
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,16,8,0,0.018995200097560883
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,16,16,0,0.01494079977273941
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,16,32,0,0.014776000380516052
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,16,64,0,0.014716799557209014
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,16,16,1,0,0.07025759816169738
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,16,2,0,0.04557600021362305
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,16,4,0,0.03322719931602478
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,16,8,0,0.026943999528884887
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,16,16,0,0.023068800568580627
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,16,32,0,0.022907200455665588
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,16,64,0,0.022987200319766997
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,16,32,1,0,0.07411680221557618
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,32,2,0,0.04350399971008301
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,32,4,0,0.03107360005378723
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,32,8,0,0.02091200053691864
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,32,16,0,0.01688160002231598
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,32,32,0,0.016758400201797485
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,32,64,0,0.015001599490642548
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,16,32,1,0,0.085479998588562
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,32,2,0,0.05332319736480713
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,32,4,0,0.03800959885120392
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,32,8,0,0.02908959984779358
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,32,16,0,0.024940800666809083
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,32,32,0,0.0230335995554924
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,32,64,0,0.023049600422382355
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,16,64,1,0,0.10503519773483276
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,64,2,0,0.05600960254669189
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,64,4,0,0.037227201461791995
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,64,8,0,0.025009599328041077
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,64,16,0,0.018774400651454925
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,64,32,0,0.01681919991970062
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,64,64,0,0.01693280041217804
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,16,64,1,0,0.12632639408111573
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,64,2,0,0.07015519738197326
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,64,4,0,0.04763039946556091
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,64,8,0,0.03528960049152374
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,64,16,0,0.02906079888343811
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,64,32,0,0.025043201446533204
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,64,64,0,0.022969600558280946
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,16,128,1,0,0.15745279788970948
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,128,2,0,0.08843680024147034
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,128,4,0,0.05127360224723816
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,128,8,0,0.033118399977684024
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,128,16,0,0.02299039959907532
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,128,32,0,0.020904000103473663
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,128,64,0,0.0189983993768692
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,128,2,0,0.11163040399551391
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,16,128,1,0,0.2002415895462036
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,128,4,0,0.06597279906272888
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,128,8,0,0.04547840058803558
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,128,16,0,0.03320800065994263
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,128,64,0,0.026976001262664796
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,16,256,1,0,0.2869904041290283
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,128,32,0,0.029153600335121155
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,256,2,0,0.15514880418777466
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,256,4,0,0.0903056025505066
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,256,8,0,0.051609599590301515
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,256,16,0,0.035395199060440065
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,256,32,0,0.03107360005378723
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,256,64,0,0.027086400985717775
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,16,256,1,0,0.3531552076339722
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,256,2,0,0.1924496054649353
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,256,4,0,0.11104960441589355
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,256,8,0,0.06606879830360413
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,256,16,0,0.045607998967170715
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,256,32,0,0.037436801195144656
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,256,64,0,0.03533760011196137
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,16,512,1,0,0.5760640144348145
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,512,2,0,0.30678400993347166
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,512,4,0,0.17226879596710204
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,512,8,0,0.10228960514068604
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,512,16,0,0.06362720131874085
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,512,32,0,0.04358560144901276
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,512,64,0,0.03733280003070831
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,16,512,1,0,0.707377576828003
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,512,2,0,0.3773279905319214
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,512,4,0,0.2093424081802368
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,512,8,0,0.12254079580307006
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,512,16,0,0.0762880027294159
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,512,32,0,0.055430400371551516
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,512,64,0,0.04756160080432892
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,16,1024,1,0,1.3014752388000488
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,1024,2,0,0.6812623977661133
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,1024,4,0,0.36273438930511476
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,1024,8,0,0.2085263967514038
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,1024,16,0,0.12536319494247436
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,1024,32,0,0.08445119857788086
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,1024,64,0,0.0597104012966156
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,16,1024,1,0,1.5042448043823242
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,1024,2,0,0.782755184173584
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,1024,8,0,0.2418976068496704
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,1024,4,0,0.4247488021850586
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,1024,16,0,0.14695839881896972
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,1024,32,0,0.10071359872817993
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,1024,64,0,0.07422239780426025
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,16,1536,1,0,2.2233488082885744
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,1536,4,0,0.5989039897918701
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,1536,2,0,1.1232704162597655
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,1536,8,0,0.3308784008026123
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,1536,16,0,0.19843839406967162
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,1536,32,0,0.12738399505615233
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,1536,64,0,0.09214400053024292
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,16,1536,1,0,2.4156368255615233
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,1536,2,0,1.2523232460021974
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,1536,4,0,0.6648911952972412
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,1536,8,0,0.3730223894119263
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,1536,16,0,0.22401759624481202
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,1536,32,0,0.14634239673614502
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,1536,64,0,0.10876319408416749
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,2048,4,0,0.875220775604248
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,2048,8,0,0.4745952129364014
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,2048,2,0,1.6945600509643555
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,16,2048,1,0,3.235281753540039
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,2048,16,0,0.2772144079208374
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,2048,32,0,0.1766800045967102
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,2048,64,0,0.12551679611206054
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,16,2048,1,0,3.4452110290527345
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,2048,8,0,0.5168799877166748
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,2048,4,0,0.9362624168395997
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,2048,2,0,1.7733503341674806
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,2048,16,0,0.3049504041671753
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,2048,32,0,0.19528640508651735
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,2048,64,0,0.1415727972984314
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,16,3072,1,0,5.93077278137207
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,3072,2,0,2.9737648010253905
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,3072,8,0,0.8256175994873047
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,3072,4,0,1.5790672302246094
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,3072,32,0,0.2895136117935181
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,3072,16,0,0.4691215991973877
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,3072,64,0,0.19890079498291016
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,16,3072,1,0,5.883891296386719
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,3072,4,0,1.5620911598205567
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,3072,8,0,0.8450480461120605
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,3072,2,0,2.9902320861816407
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,3072,16,0,0.4882016181945801
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,3072,32,0,0.3078495979309082
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,3072,64,0,0.21200640201568605
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,16,4096,1,0,9.281130981445312
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,16,4096,8,0,1.2584336280822754
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,16,4096,4,0,2.4066160202026365
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,16,4096,2,0,4.807571029663086
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,16,4096,32,0,0.41980957984924316
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,16,4096,64,0,0.2824064016342163
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,16,4096,16,0,0.6982880115509034
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,16,4096,8,0,1.233409595489502
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,16,4096,1,0,8.781304168701173
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,16,4096,4,0,2.3052143096923827
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,16,4096,2,0,4.439414215087891
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,16,4096,16,0,0.6969056129455566
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,16,4096,64,0,0.2916352033615112
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,16,4096,32,0,0.4321887969970703
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,32,1,1,0,0.08251519799232483
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,1,2,0,0.05093119740486145
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,1,4,0,0.03112800121307373
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,1,8,0,0.024084800481796266
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,1,16,0,0.01895360052585602
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,1,32,0,0.01679680049419403
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,1,64,0,0.014764800667762756
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,32,1,1,0,0.09015520215034485
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,1,2,0,0.05564000010490418
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,1,4,0,0.03933599889278412
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,1,8,0,0.03107360005378723
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,1,16,0,0.025655999779701233
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,1,32,0,0.020924800634384157
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,1,64,0,0.021028800308704375
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,32,16,1,0,0.1028864026069641
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,16,2,0,0.06064000129699707
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,16,4,0,0.037329599261283875
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,16,8,0,0.026998400688171387
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,16,16,0,0.018799999356269838
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,16,32,0,0.016964800655841827
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,16,64,0,0.01674560010433197
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,32,16,1,0,0.117302405834198
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,16,2,0,0.0701471984386444
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,16,4,0,0.04556640088558197
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,16,8,0,0.03324959874153137
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,16,16,0,0.028174400329589844
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,16,32,0,0.023081600666046143
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,16,64,0,0.022969600558280946
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,32,32,1,0,0.13475680351257324
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,32,2,0,0.07422559857368469
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,32,4,0,0.04529919922351837
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,32,8,0,0.031040000915527343
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,32,16,0,0.02260800004005432
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,32,32,0,0.01693280041217804
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,32,64,0,0.016715200245380403
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,32,32,1,0,0.153766405582428
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,32,2,0,0.08661440014839172
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,32,4,0,0.053668802976608275
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,32,8,0,0.037462401390075686
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,32,16,0,0.03115360140800476
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,32,32,0,0.025091201066970825
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,32,64,0,0.02486239969730377
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,32,64,1,0,0.19085919857025146
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,64,2,0,0.1049280047416687
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,64,4,0,0.05987679958343506
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,64,8,0,0.03933919966220856
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,64,16,0,0.0270224004983902
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,64,32,0,0.020769600570201874
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,64,64,0,0.018911999464035035
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,32,64,1,0,0.23199679851531982
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,64,4,0,0.0740447998046875
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,64,8,0,0.047635200619697574
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,64,2,0,0.12799359560012818
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,64,16,0,0.03540320098400116
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,64,32,0,0.02913439869880676
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,64,64,0,0.027211201190948487
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,32,128,1,0,0.2957391977310181
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,128,2,0,0.16191680431365968
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,128,4,0,0.09048640131950378
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,128,8,0,0.05392959713935852
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,128,16,0,0.035329601168632506
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,128,32,0,0.027075201272964478
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,128,64,0,0.02298080027103424
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,32,128,1,0,0.3705440044403076
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,128,2,0,0.2052623987197876
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,128,4,0,0.11456960439682007
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,128,8,0,0.07009279727935791
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,128,16,0,0.04746879935264588
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,128,32,0,0.037264001369476316
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,128,64,0,0.033292800188064575
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,32,256,1,0,0.5515615940093994
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,256,2,0,0.2915744066238403
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,256,4,0,0.16083359718322754
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,256,8,0,0.0946016013622284
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,256,16,0,0.05703520178794861
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,256,32,0,0.03940480053424835
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,256,64,0,0.03450559973716736
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,32,256,1,0,0.6723584175109864
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,256,4,0,0.19808319807052613
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,256,2,0,0.35944480895996095
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,256,8,0,0.11832640171051026
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,256,16,0,0.07237759828567505
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,256,32,0,0.05360640287399292
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,256,64,0,0.04549280107021332
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,32,512,1,0,1.1280096054077149
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,512,2,0,0.5926159858703614
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,512,4,0,0.3193552017211914
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,512,8,0,0.18280160427093506
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,512,16,0,0.11289440393447876
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,512,32,0,0.07633919715881347
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,512,64,0,0.0538640022277832
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,32,512,1,0,1.3810463905334474
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,512,2,0,0.7198495864868164
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,512,4,0,0.39045760631561277
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,512,8,0,0.22484800815582276
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,512,16,0,0.13645440340042114
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,512,32,0,0.0927951991558075
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,512,64,0,0.06951839923858642
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,32,1024,1,0,2.576892852783203
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,1024,2,0,1.3187583923339843
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,1024,4,0,0.6946703910827636
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,1024,8,0,0.38969600200653076
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,1024,16,0,0.2297231912612915
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,1024,32,0,0.1492735981941223
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,1024,64,0,0.10684800148010254
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,32,1024,1,0,2.979487991333008
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,1024,8,0,0.4499839782714844
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,1024,4,0,0.8107279777526856
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,1024,2,0,1.531383991241455
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,1024,16,0,0.2700256109237671
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,1024,32,0,0.17492640018463135
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,1024,64,0,0.12708959579467774
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,32,1536,1,0,4.35747184753418
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,1536,2,0,2.2553743362426757
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,1536,4,0,1.1577983856201173
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,1536,8,0,0.6300432205200195
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,1536,16,0,0.36528000831604
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,1536,32,0,0.23256158828735352
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,1536,64,0,0.16265439987182617
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,32,1536,1,0,4.79987678527832
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,1536,4,0,1.2944031715393067
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,1536,2,0,2.4606815338134767
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,1536,8,0,0.7055056095123291
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,1536,16,0,0.4142335891723633
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,1536,32,0,0.2672015905380249
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,1536,64,0,0.18787679672241211
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,32,2048,1,0,6.440806579589844
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,32,2048,8,0,0.9216447830200195
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,32,2048,4,0,1.7381776809692382
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,32,2048,2,0,3.273603057861328
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,32,2048,16,0,0.518943977355957
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,32,2048,32,0,0.32332799434661863
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,32,2048,64,0,0.22514879703521729
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,32,2048,1,0,6.854412841796875
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,32,2048,8,0,0.9928288459777832
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,32,2048,4,0,1.827841567993164
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,32,2048,2,0,3.500806427001953
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,32,2048,16,0,0.5713007926940918
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,32,2048,32,0,0.36113920211791994
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,32,2048,64,0,0.2522239923477173
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,64,1,1,0,0.15393279790878295
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,1,4,0,0.04965760111808777
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,1,2,0,0.08431839942932129
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,1,8,0,0.031225600838661195
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,1,16,0,0.022974400222301482
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,1,32,0,0.01961279958486557
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,1,64,0,0.015003199875354766
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,64,1,1,0,0.1581264019012451
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,1,2,0,0.09033920168876648
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,1,4,0,0.055883198976516724
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,1,8,0,0.03731040060520172
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,1,16,0,0.030131199955940248
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,1,32,0,0.024897600710391998
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,1,64,0,0.022937600314617158
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,64,16,1,0,0.19908640384674073
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,16,4,0,0.06077119708061218
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,16,2,0,0.10450400114059448
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,16,8,0,0.0373744010925293
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,16,16,0,0.027113598585128785
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,16,32,0,0.02090719938278198
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,16,64,0,0.016940799355506898
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,16,2,0,0.1174496054649353
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,64,16,1,0,0.21822879314422608
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,16,4,0,0.07210239768028259
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,16,8,0,0.04734399914741516
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,16,16,0,0.033364799618721006
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,16,32,0,0.02720479965209961
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,16,64,0,0.023160000145435334
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,64,32,1,0,0.25664639472961426
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,32,2,0,0.13642239570617676
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,32,4,0,0.07424319982528686
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,32,8,0,0.04559679925441742
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,32,16,0,0.033129599690437314
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,32,32,0,0.022908799350261688
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,32,64,0,0.01883520036935806
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,64,32,1,0,0.28393919467926027
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,32,2,0,0.1540832042694092
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,32,4,0,0.08853600025177003
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,32,8,0,0.055796802043914795
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,32,16,0,0.03962559998035431
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,32,32,0,0.03128640055656433
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,32,64,0,0.02715040147304535
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,64,64,1,0,0.3633968114852905
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,64,2,0,0.19307039976119994
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,64,4,0,0.10900479555130005
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,64,8,0,0.060652798414230345
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,64,16,0,0.04148319959640503
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,64,32,0,0.0311024010181427
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,64,64,0,0.02311840057373047
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,64,64,1,0,0.43590397834777833
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,64,2,0,0.23452799320220946
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,64,4,0,0.13135839700698854
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,64,8,0,0.07640320062637329
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,64,16,0,0.053431999683380124
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,64,32,0,0.038980799913406375
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,64,64,0,0.031272000074386595
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,128,2,0,0.29983360767364503
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,64,128,1,0,0.5715936183929443
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,128,4,0,0.16623519659042357
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,128,8,0,0.0964896023273468
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,128,16,0,0.05979679822921753
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,128,32,0,0.04152320027351379
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,128,64,0,0.03318400084972382
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,64,128,1,0,0.7141839981079101
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,128,4,0,0.21058719158172606
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,128,2,0,0.3789792060852051
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,128,8,0,0.12120800018310547
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,128,16,0,0.07620800137519837
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,128,64,0,0.043532800674438474
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,64,256,1,0,1.07532958984375
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,128,32,0,0.05575039982795715
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,256,2,0,0.5578320026397705
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,256,4,0,0.3038640022277832
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,256,8,0,0.17225760221481323
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,256,16,0,0.10497599840164185
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,256,32,0,0.06796960234642029
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,256,64,0,0.051729601621627805
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,64,256,1,0,1.3140224456787108
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,256,2,0,0.6873472213745118
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,256,4,0,0.3737152099609375
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,256,8,0,0.21318719387054444
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,256,32,0,0.0904416024684906
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,256,16,0,0.132151997089386
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,256,64,0,0.0659824013710022
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,64,512,1,0,2.2318111419677735
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,512,4,0,0.6133440017700196
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,512,2,0,1.1603232383728028
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,512,8,0,0.3406703948974609
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,512,16,0,0.20423998832702636
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,512,32,0,0.133296000957489
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,512,64,0,0.09667999744415283
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,64,512,1,0,2.7275344848632814
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,512,2,0,1.4064767837524415
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,512,8,0,0.41726078987121584
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,512,4,0,0.7469632148742675
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,512,16,0,0.24973440170288086
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,512,32,0,0.1650607943534851
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,512,64,0,0.11948479413986206
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,64,1024,2,0,2.622831916809082
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,64,1024,4,0,1.3621567726135253
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,64,1024,8,0,0.7496319770812988
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,64,1024,1,0,5.099542236328125
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,64,1024,16,0,0.4314896106719971
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,64,1024,32,0,0.2738944053649902
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,64,1024,64,0,0.19416799545288085
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,64,1024,1,0,5.9088623046875
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,64,1024,4,0,1.5851455688476563
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,64,1024,8,0,0.8676783561706543
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,64,1024,2,0,3.0273935317993166
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,64,1024,16,0,0.5052320003509522
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,64,1024,32,0,0.3250864028930664
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,64,1024,64,0,0.23048000335693358
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,128,1,1,0,0.33038079738616943
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,1,2,0,0.15402239561080933
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,1,4,0,0.08433600068092346
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,1,8,0,0.04962239861488342
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,1,16,0,0.031231999397277832
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,1,32,0,0.023076799511909486
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,1,64,0,0.02072319984436035
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,128,1,1,0,0.2976560115814209
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,1,2,0,0.15827679634094238
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,1,4,0,0.09062880277633667
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,1,8,0,0.055820798873901366
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,1,16,0,0.037364798784255984
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,1,32,0,0.029707199335098265
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,1,64,0,0.025007998943328856
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,128,16,1,0,0.38355679512023927
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,16,2,0,0.2006608009338379
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,16,4,0,0.10499039888381959
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,16,8,0,0.06193119883537292
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,16,16,0,0.039297598600387576
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,16,32,0,0.0270687997341156
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,16,64,0,0.02096160054206848
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,128,16,1,0,0.404099178314209
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,16,2,0,0.21822080612182618
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,16,4,0,0.1177664041519165
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,16,8,0,0.07212160229682922
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,16,16,0,0.04735200107097626
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,16,32,0,0.03715200126171112
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,16,64,0,0.030633598566055298
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,128,32,1,0,0.49290242195129397
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,32,2,0,0.25685920715332033
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,32,4,0,0.13992320299148558
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,32,8,0,0.07850080132484435
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,32,16,0,0.04761120080947876
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,32,32,0,0.03531680107116699
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,32,64,0,0.025054401159286498
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,128,32,1,0,0.539851188659668
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,32,4,0,0.1570912003517151
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,32,2,0,0.2856528043746948
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,32,8,0,0.09224320054054261
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,32,16,0,0.05785120129585266
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,32,32,0,0.045371198654174806
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,32,64,0,0.03316799998283386
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,128,64,1,0,0.7078688144683838
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,64,2,0,0.3673151969909668
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,64,4,0,0.19710079431533814
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,64,8,0,0.11343679428100586
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,64,16,0,0.0661952018737793
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,64,32,0,0.04554400146007538
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,64,64,0,0.03513120114803314
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,128,64,1,0,0.8443872451782226
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,64,2,0,0.44369120597839357
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,64,4,0,0.24204161167144775
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,64,8,0,0.13763200044631957
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,64,16,0,0.0822048008441925
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,64,32,0,0.05956799983978271
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,64,64,0,0.04549759924411774
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,128,128,1,0,1.116312026977539
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,128,8,0,0.17550560235977172
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,128,2,0,0.580676794052124
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,128,4,0,0.3118367910385132
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,128,16,0,0.10761439800262451
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,128,32,0,0.0720192015171051
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,128,64,0,0.051744002103805545
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,128,128,1,0,1.4039392471313477
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,128,4,0,0.3923007965087891
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,128,8,0,0.22396159172058105
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,128,2,0,0.7285024166107178
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,128,16,0,0.13556159734725953
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,128,32,0,0.09201599955558777
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,128,64,0,0.06818879842758178
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,128,256,1,0,2.1277231216430663
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,256,2,0,1.0892895698547362
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,256,8,0,0.32457919120788575
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,256,4,0,0.5801424026489258
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,256,16,0,0.1930735945701599
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,256,32,0,0.12716799974441528
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,256,64,0,0.09047200083732605
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,128,256,1,0,2.5931615829467773
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,256,2,0,1.340665626525879
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,256,4,0,0.7130335807800293
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,256,8,0,0.3994208097457886
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,256,16,0,0.23989439010620117
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,256,32,0,0.15994720458984374
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,256,64,0,0.11722400188446044
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,128,512,4,0,1.1979071617126464
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,128,512,1,0,4.445137786865234
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,128,512,2,0,2.2811712265014648
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,128,512,8,0,0.6593167781829834
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,128,512,16,0,0.38405120372772217
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,128,512,32,0,0.24733760356903076
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,128,512,64,0,0.17668639421463012
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,128,512,1,0,5.436395263671875
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,128,512,8,0,0.8012127876281738
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,128,512,4,0,1.461836814880371
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,128,512,2,0,2.7812000274658204
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,128,512,16,0,0.4705056190490723
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,128,512,32,0,0.3051503896713257
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,128,512,64,0,0.2197200059890747
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,256,1,1,0,0.5891583919525146
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,256,1,2,0,0.2953408002853394
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,256,1,4,0,0.16174880266189576
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,256,1,8,0,0.0864463984966278
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,256,1,16,0,0.04971520006656647
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,256,1,32,0,0.03164960145950317
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,256,1,64,0,0.023014399409294128
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,256,1,1,0,0.5717504024505615
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,256,1,2,0,0.2976752042770386
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,256,1,4,0,0.1585584044456482
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,256,1,8,0,0.08850880265235901
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,256,1,16,0,0.05575680136680603
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,256,1,32,0,0.037412801384925844
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,256,1,64,0,0.031036800146102904
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,256,16,1,0,0.745201587677002
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,256,16,2,0,0.384115195274353
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,256,16,4,0,0.20328960418701172
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,256,16,8,0,0.1079408049583435
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,256,16,16,0,0.06543359756469727
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,256,16,32,0,0.041440001130104064
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,256,16,64,0,0.03147040009498596
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,256,16,1,0,0.7784304141998291
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,256,16,4,0,0.2195888042449951
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,256,16,2,0,0.40592160224914553
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,256,16,8,0,0.12334400415420532
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,256,16,16,0,0.07431679964065552
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,256,16,32,0,0.05159199833869934
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,256,16,64,0,0.03938080072402954
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,256,32,1,0,0.970911979675293
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,256,32,4,0,0.26257278919219973
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,256,32,2,0,0.4992544174194336
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,256,32,8,0,0.1444208025932312
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,256,32,16,0,0.08405600190162658
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,256,32,32,0,0.053598397970199586
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,256,32,64,0,0.03951199948787689
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,256,32,1,0,1.049732780456543
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,256,32,4,0,0.2931312084197998
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,256,32,2,0,0.5457168102264405
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,256,32,8,0,0.16416159868240357
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,256,32,16,0,0.09879680275917054
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,256,32,32,0,0.06430559754371643
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,256,32,64,0,0.05159519910812378
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,256,64,1,0,1.391153621673584
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,256,64,2,0,0.7150847911834717
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,256,64,4,0,0.37688961029052737
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,256,64,8,0,0.20955839157104492
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,256,64,16,0,0.12350879907608033
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,256,64,32,0,0.07838559746742249
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,256,64,64,0,0.057739198207855225
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,256,64,1,0,1.6621343612670898
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,256,64,2,0,0.8576175689697265
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,256,64,4,0,0.4571119785308838
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,256,64,8,0,0.25608320236206056
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,256,64,16,0,0.15209120512008667
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,256,64,32,0,0.09874399900436401
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,256,64,64,0,0.07462720274925232
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,256,128,1,0,2.2073312759399415
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,256,128,2,0,1.1409695625305176
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,256,128,4,0,0.6003712177276611
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,256,128,8,0,0.33049440383911133
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,256,128,16,0,0.19848159551620484
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,256,128,32,0,0.127510404586792
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,256,128,64,0,0.0924560010433197
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,256,128,1,0,2.7706607818603515
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,256,128,8,0,0.4168896198272705
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,256,128,4,0,0.7576288223266602
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,256,128,2,0,1.4271696090698243
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,256,128,16,0,0.2487936019897461
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,256,128,32,0,0.16349439620971679
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,256,128,64,0,0.11921600103378296
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,128,256,256,1,0,4.224710464477539
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,16,256,256,8,0,0.6250319957733155
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,32,256,256,4,0,1.134615993499756
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,64,256,256,2,0,2.1510080337524413
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,8,256,256,16,0,0.36745920181274416
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,4,256,256,32,0,0.23556320667266845
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,float16,2,256,256,64,0,0.16963839530944824
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,128,256,256,1,0,5.159483337402344
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,64,256,256,2,0,2.6490224838256835
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,16,256,256,8,0,0.7642335891723633
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,32,256,256,4,0,1.3974575996398926
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,8,256,256,16,0,0.4525519847869873
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,4,256,256,32,0,0.29337759017944337
SGLang,0.5.9,NVIDIA GB300,mla_context,trtllm_mla,float16,fp8,2,256,256,64,0,0.21498239040374756
