framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,1,1,0,0.01459999978542328
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,1,2,0,0.012895999848842621
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,1,4,0,0.014535999298095703
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,1,8,0,0.014567999541759491
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,1,16,0,0.012486399710178375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,1,32,0,0.01374399960041046
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,1,64,0,0.014166399836540222
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,1,1,0,0.020791999995708466
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,1,4,0,0.020764799416065217
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,1,8,0,0.02099359929561615
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,1,16,0,0.021113599836826324
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,1,32,0,0.020948800444602966
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,1,2,0,0.020550400018692017
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,1,64,0,0.020880000293254854
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,16,1,0,0.016806399822235106
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,16,2,0,0.016814400255680085
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,16,8,0,0.014987200498580933
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,16,16,0,0.014710399508476257
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,16,32,0,0.01494240015745163
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,16,4,0,0.014918400347232819
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,16,64,0,0.01478240042924881
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,16,1,0,0.022976000607013703
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,16,2,0,0.022972799837589264
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,16,8,0,0.023025600612163542
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,16,4,0,0.02131199985742569
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,16,16,0,0.023131200671195985
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,16,32,0,0.023052799701690673
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,16,64,0,0.022939200699329376
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,32,2,0,0.015355199575424194
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,32,4,0,0.01658719927072525
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,32,1,0,0.016964800655841827
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,32,8,0,0.014886400103569031
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,32,16,0,0.01467680037021637
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,32,32,0,0.01483200043439865
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,32,1,0,0.023043200373649597
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,32,64,0,0.01674720048904419
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,32,2,0,0.023076799511909486
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,32,4,0,0.023148800432682037
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,32,8,0,0.02295520007610321
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,32,32,0,0.02263360023498535
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,32,64,0,0.022971199452877046
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,32,16,0,0.02287999987602234
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,64,1,0,0.016953599452972413
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,64,2,0,0.016857600212097167
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,64,4,0,0.016862399876117706
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,64,16,0,0.016771200299263
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,64,8,0,0.01509920060634613
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,64,32,0,0.014839999377727509
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,64,64,0,0.016883200407028197
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,64,1,0,0.025110399723052977
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,64,4,0,0.02282080054283142
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,64,8,0,0.023080000281333925
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,64,2,0,0.025364801287651062
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,64,16,0,0.02284799963235855
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,64,32,0,0.021035200357437132
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,64,64,0,0.022993600368499754
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,128,2,0,0.01886080056428909
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,128,1,0,0.019041599333286287
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,128,4,0,0.01687999963760376
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,128,8,0,0.017046399414539337
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,128,16,0,0.016868799924850464
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,128,64,0,0.01679680049419403
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,128,1,0,0.029051199555397034
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,128,32,0,0.014839999377727509
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,128,2,0,0.025113600492477416
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,128,4,0,0.02502560019493103
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,128,8,0,0.025095999240875244
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,128,16,0,0.023080000281333925
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,128,32,0,0.0242576003074646
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,128,64,0,0.023030400276184082
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,256,1,0,0.026076799631118773
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,256,2,0,0.02303680032491684
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,256,4,0,0.020926399528980254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,256,8,0,0.019012799859046935
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,256,16,0,0.018963199853897095
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,256,64,0,0.01889439970254898
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,256,1,0,0.03525600135326386
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,256,32,0,0.018863999843597413
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,256,2,0,0.03017599880695343
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,256,4,0,0.029057601094245912
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,256,8,0,0.02908799946308136
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,256,32,0,0.027187201380729675
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,256,16,0,0.0271807998418808
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,256,64,0,0.027127999067306518
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,512,1,0,0.03450720012187958
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,512,2,0,0.029190400242805482
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,512,4,0,0.027166399359703063
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,512,8,0,0.023048000037670137
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,512,16,0,0.023160000145435334
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,512,32,0,0.022937600314617158
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,512,1,0,0.045710399746894836
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,512,64,0,0.023057599365711213
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,512,2,0,0.03749760091304779
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,512,4,0,0.03328000009059906
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,512,8,0,0.031190401315689086
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,512,32,0,0.029497599601745604
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,512,64,0,0.029238399863243104
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,512,16,0,0.029297599196434022
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,1024,1,0,0.06761279702186584
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,1024,2,0,0.041387200355529785
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,1024,4,0,0.037283200025558474
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,1024,8,0,0.033215999603271484
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,1024,16,0,0.03126559853553772
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,1024,32,0,0.03123520016670227
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,1024,64,0,0.03040800094604492
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,1024,1,0,0.08018720149993896
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,1024,2,0,0.05198240280151367
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,1024,4,0,0.04363360106945038
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,1024,16,0,0.03740319907665253
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,1024,32,0,0.03513759970664978
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,1024,64,0,0.03722879886627197
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,1024,8,0,0.03942080140113831
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,1536,1,0,0.10500320196151733
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,1536,2,0,0.065556800365448
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,1536,4,0,0.04559679925441742
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,1536,8,0,0.04163840115070343
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,1536,16,0,0.03806560039520264
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,1536,64,0,0.037136000394821164
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,1536,32,0,0.03714239895343781
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,1536,1,0,0.12010079622268677
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,1536,2,0,0.07605119943618774
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,1536,4,0,0.05658559799194336
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,1536,8,0,0.047624000906944276
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,1536,32,0,0.043507200479507444
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,1536,16,0,0.04557600021362305
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,1536,64,0,0.04349279999732971
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,2048,1,0,0.14846080541610718
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,2048,2,0,0.09039679765701295
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,2048,8,0,0.05095040202140808
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,2048,4,0,0.05772960186004639
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,2048,16,0,0.04555839896202087
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,2048,32,0,0.04551199972629547
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,2048,64,0,0.045256000757217404
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,2048,1,0,0.16413919925689696
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,2048,2,0,0.10021599531173705
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,2048,4,0,0.06399040222167969
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,2048,8,0,0.055934399366378784
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,2048,32,0,0.04970560073852539
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,2048,64,0,0.04967359900474548
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,2048,16,0,0.051755201816558835
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,3072,1,0,0.25734720230102537
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,3072,2,0,0.1460271954536438
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,3072,4,0,0.09374079704284669
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,3072,8,0,0.06830719709396363
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,3072,16,0,0.06320480108261109
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,3072,32,0,0.05856639742851257
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,3072,64,0,0.057923197746276855
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,3072,1,0,0.26700639724731445
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,3072,4,0,0.10003679990768433
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,3072,2,0,0.15762399435043334
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,3072,8,0,0.07442560195922851
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,3072,32,0,0.06394879817962647
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,3072,64,0,0.06477919816970826
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,4096,1,0,0.3837696075439453
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,3072,16,0,0.06819679737091064
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,4096,2,0,0.21946558952331544
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,4096,8,0,0.08452799916267395
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,4096,16,0,0.0783407986164093
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,4096,4,0,0.1375599980354309
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,4096,32,0,0.07421759963035583
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,4096,64,0,0.07192320227622986
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,4096,1,0,0.3890176057815552
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,4096,2,0,0.22347838878631593
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,4096,4,0,0.13858720064163207
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,4096,8,0,0.09234240055084228
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,4096,32,0,0.07838720083236694
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,4096,64,0,0.07629280090332032
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,6144,1,0,0.7091567993164063
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,4096,16,0,0.08257439732551575
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,6144,2,0,0.3993168115615845
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,6144,8,0,0.14632320404052734
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,6144,16,0,0.10996320247650146
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,6144,4,0,0.2319200038909912
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,6144,32,0,0.10310239791870117
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,6144,64,0,0.09892799854278564
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,6144,2,0,0.38039360046386717
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,6144,1,0,0.6876399993896485
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,6144,4,0,0.2285248041152954
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,6144,8,0,0.14625920057296754
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,6144,16,0,0.11308640241622925
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,6144,64,0,0.10291999578475952
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,6144,32,0,0.10501279830932617
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,8192,1,0,1.1229424476623535
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,8192,2,0,0.6114240169525147
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,8192,4,0,0.36016321182250977
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,8192,16,0,0.14409279823303223
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,8192,8,0,0.22634561061859132
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,8192,32,0,0.1336351990699768
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,8192,64,0,0.12972960472106934
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,8192,1,0,1.0407471656799316
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,8192,4,0,0.3363183975219727
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,8192,2,0,0.5810863971710205
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,8192,8,0,0.21736481189727783
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,8192,16,0,0.14253599643707277
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,8192,32,0,0.13356640338897705
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,8192,64,0,0.1276047945022583
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,10240,1,0,1.8081167221069336
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,10240,2,0,0.8780960083007813
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,10240,4,0,0.4962399959564209
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,10240,16,0,0.19262239933013917
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,10240,32,0,0.16233439445495607
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,10240,8,0,0.30970718860626223
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,10240,64,0,0.1581231951713562
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,10240,1,0,1.472060775756836
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,10240,4,0,0.46025757789611815
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,10240,2,0,0.815071964263916
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,10240,8,0,0.29087679386138915
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,10240,16,0,0.18167200088500976
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,10240,32,0,0.16091840267181395
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,10240,64,0,0.15432159900665282
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,12288,1,0,2.478620719909668
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,12288,2,0,1.1903023719787598
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,12288,4,0,0.6636943817138672
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,12288,8,0,0.40221757888793946
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,12288,16,0,0.2565295934677124
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,12288,32,0,0.19300479888916017
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,12288,64,0,0.18670079708099366
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,12288,2,0,1.079150390625
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,12288,8,0,0.3630624055862427
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,12288,4,0,0.6026991844177246
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,12288,1,0,1.9920528411865235
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,12288,32,0,0.18971840143203736
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,12288,16,0,0.2407855987548828
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,12288,64,0,0.18068959712982177
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,16384,1,0,4.380033493041992
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,16384,4,0,1.0788703918457032
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,16384,2,0,1.9238048553466798
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,16384,8,0,0.6207759857177735
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,16384,32,0,0.26255838871002196
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,16384,64,0,0.2442336082458496
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,16384,16,0,0.39808800220489504
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,16384,1,0,3.4045440673828127
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,16384,2,0,1.7137823104858398
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,16384,4,0,0.9383855819702148
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,16384,8,0,0.5597311973571777
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,16384,16,0,0.3589247941970825
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,16384,64,0,0.23416800498962403
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,16384,32,0,0.24518721103668212
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,32768,8,0,1.9679216384887694
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,32768,4,0,4.427601623535156
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,32768,2,0,10.883419036865234
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,32768,16,0,1.1419952392578125
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,32768,32,0,0.7123248100280761
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,32768,64,0,0.4887392044067383
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,32768,1,0,22.790257263183594
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,32768,4,0,3.051278305053711
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,32768,2,0,6.628934478759765
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,32768,8,0,1.6706655502319336
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,32768,1,0,13.329977416992188
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,32768,16,0,0.9753647804260254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,32768,32,0,0.6315087795257568
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,32768,64,0,0.4449728012084961
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,1,1,0,0.016836799681186676
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,1,4,0,0.01483519971370697
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,1,2,0,0.014798399806022645
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,1,16,0,0.016276800632476808
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,1,8,0,0.01685439944267273
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,1,32,0,0.014721600711345673
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,1,64,0,0.014843200147151948
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,1,1,0,0.022908799350261688
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,1,4,0,0.02285120040178299
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,1,2,0,0.023897600173950196
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,1,8,0,0.02295839935541153
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,1,16,0,0.02282560020685196
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,1,32,0,0.021724799275398256
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,1,64,0,0.0211776003241539
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,16,1,0,0.01683039963245392
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,16,2,0,0.016982400417327882
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,16,4,0,0.014679999649524688
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,16,16,0,0.015014399588108063
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,16,8,0,0.014907200634479523
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,16,32,0,0.014912000298500061
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,16,64,0,0.014588800072669984
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,16,1,0,0.024916799366474153
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,16,4,0,0.023099200427532197
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,16,8,0,0.022841599583625794
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,16,2,0,0.025007998943328856
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,16,16,0,0.023014399409294128
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,16,32,0,0.022907200455665588
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,16,64,0,0.022920000553131103
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,32,1,0,0.018544000387191773
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,32,2,0,0.016868799924850464
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,32,8,0,0.014718399941921234
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,32,4,0,0.014819200336933135
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,32,32,0,0.015056000649929046
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,32,16,0,0.014924800395965577
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,32,64,0,0.014815999567508698
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,32,1,0,0.02701919972896576
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,32,2,0,0.022995199263095855
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,32,4,0,0.02340639978647232
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,32,8,0,0.022945599257946016
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,32,16,0,0.023158399760723113
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,32,32,0,0.022963200509548188
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,64,1,0,0.01897439956665039
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,32,64,0,0.021206399798393248
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,64,2,0,0.01685599982738495
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,64,4,0,0.01693439930677414
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,64,8,0,0.016832000017166136
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,64,32,0,0.014849600195884705
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,64,16,0,0.016803200542926788
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,64,64,0,0.014715200662612915
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,64,1,0,0.02906239926815033
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,64,2,0,0.025945600867271424
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,64,4,0,0.025097599625587462
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,64,8,0,0.02295520007610321
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,64,16,0,0.023153600096702576
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,64,32,0,0.02292319983243942
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,128,1,0,0.02292319983243942
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,64,64,0,0.025003200769424437
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,128,2,0,0.020873600244522096
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,128,4,0,0.017020800709724428
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,128,8,0,0.016918399930000307
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,128,16,0,0.016836799681186676
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,128,32,0,0.016857600212097167
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,128,64,0,0.01672479957342148
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,128,1,0,0.033283200860023496
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,128,4,0,0.02521440088748932
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,128,2,0,0.027239999175071715
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,128,8,0,0.025073599815368653
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,128,16,0,0.025203201174736022
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,128,32,0,0.02507199943065643
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,256,2,0,0.025113600492477416
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,256,1,0,0.031112000346183777
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,128,64,0,0.024991999566555022
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,256,4,0,0.023108799755573273
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,256,8,0,0.020897600054740905
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,256,16,0,0.020751999318599702
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,256,32,0,0.018955199420452117
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,256,64,0,0.019049599766731262
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,256,2,0,0.03511680066585541
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,256,1,0,0.043412798643112184
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,256,8,0,0.029128000140190125
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,256,4,0,0.03126719892024994
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,256,16,0,0.02720000147819519
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,256,32,0,0.02707040011882782
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,256,64,0,0.027081599831581114
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,512,1,0,0.056536000967025754
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,512,2,0,0.03539359867572785
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,512,4,0,0.030660799145698546
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,512,8,0,0.025169599056243896
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,512,32,0,0.023124800622463228
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,512,16,0,0.024916799366474153
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,512,64,0,0.02309119999408722
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,512,1,0,0.0721679985523224
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,512,2,0,0.04686079919338226
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,512,8,0,0.03317599892616272
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,512,16,0,0.03313600122928619
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,512,4,0,0.03896799981594086
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,512,32,0,0.03130559921264649
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,512,64,0,0.031215998530387878
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,1024,1,0,0.11720479726791382
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,1024,2,0,0.07034080028533936
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,1024,4,0,0.04351679980754852
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,1024,8,0,0.03732799887657166
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,1024,16,0,0.03340800106525421
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,1024,64,0,0.03130399882793426
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,1024,32,0,0.033180800080299375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,1024,1,0,0.13430720567703247
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,1024,2,0,0.08224959969520569
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,1024,4,0,0.05283679962158203
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,1024,8,0,0.045521599054336545
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,1024,32,0,0.039401599764823915
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,1024,16,0,0.04150559902191162
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,1024,64,0,0.03743360042572021
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,1536,2,0,0.10823999643325806
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,1536,1,0,0.18752959966659546
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,1536,4,0,0.0672544002532959
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,1536,8,0,0.04784319996833801
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,1536,16,0,0.043568000197410583
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,1536,64,0,0.03937279880046844
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,1536,1,0,0.20904479026794434
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,1536,32,0,0.03965280055999756
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,1536,2,0,0.12317119836807251
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,1536,4,0,0.07831199765205384
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,1536,8,0,0.05587520003318787
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,1536,16,0,0.04984000027179718
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,1536,32,0,0.047625601291656494
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,1536,64,0,0.045491200685501096
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,2048,1,0,0.2688783884048462
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,2048,4,0,0.09283840060234069
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,2048,2,0,0.15438560247421265
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,2048,8,0,0.059736001491546634
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,2048,16,0,0.0517632007598877
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,2048,32,0,0.04982239902019501
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,2048,1,0,0.2921135902404785
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,2048,64,0,0.04766719937324524
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,2048,2,0,0.1667296051979065
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,2048,4,0,0.10290399789810181
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,2048,8,0,0.06614239811897278
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,2048,32,0,0.0557744026184082
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,2048,16,0,0.06007199883460999
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,2048,64,0,0.055745601654052734
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,3072,2,0,0.25955040454864503
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,3072,1,0,0.46554718017578123
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,3072,4,0,0.15423519611358644
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,3072,8,0,0.09769279956817627
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,3072,16,0,0.07033600211143494
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,3072,32,0,0.06592959761619568
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,3072,1,0,0.48496479988098146
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,3072,64,0,0.0639840006828308
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,3072,4,0,0.1608399987220764
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,3072,2,0,0.27284960746765136
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,3072,8,0,0.10506720542907715
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,3072,16,0,0.07860640287399293
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,3072,32,0,0.07235360145568848
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,3072,64,0,0.07012799978256226
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,4096,1,0,0.7089136123657227
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,4096,2,0,0.39404959678649903
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,4096,4,0,0.22683041095733641
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,4096,8,0,0.1428447961807251
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,4096,16,0,0.09070079922676086
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,4096,64,0,0.07844799757003784
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,4096,1,0,0.7154319763183594
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,4096,32,0,0.08320479989051818
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,4096,2,0,0.3921839952468872
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,4096,4,0,0.22790079116821288
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,4096,8,0,0.14548319578170776
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,4096,16,0,0.09895359873771667
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,4096,32,0,0.09063360095024109
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,4096,64,0,0.08441280126571656
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,6144,1,0,1.3549887657165527
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,6144,4,0,0.40177121162414553
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,6144,2,0,0.7293856143951416
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,6144,8,0,0.24081120491027833
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,6144,16,0,0.1606528043746948
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,6144,32,0,0.11768959760665894
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,6144,1,0,1.2683247566223144
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,6144,64,0,0.11127680540084839
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,6144,2,0,0.6959871768951416
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,6144,4,0,0.3912607908248901
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,6144,8,0,0.23577280044555665
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,6144,32,0,0.12350399494171142
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,6144,16,0,0.15828319787979125
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,6144,64,0,0.11582560539245605
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,8192,1,0,2.1335983276367188
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,8192,2,0,1.1527215957641601
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,8192,8,0,0.3669919967651367
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,8192,4,0,0.6397136211395263
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,8192,16,0,0.23648478984832763
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,8192,32,0,0.15731040239334107
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,8192,64,0,0.14599679708480834
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,8192,1,0,1.9880624771118165
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,8192,2,0,1.054860782623291
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,8192,4,0,0.5969024181365967
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,8192,8,0,0.3495743989944458
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,8192,32,0,0.15830559730529786
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,8192,64,0,0.14817119836807252
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,8192,16,0,0.23214240074157716
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,10240,1,0,3.83768310546875
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,10240,4,0,0.9054335594177246
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,10240,2,0,1.6380895614624023
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,10240,16,0,0.32156479358673096
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,10240,8,0,0.5206719875335694
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,10240,32,0,0.207694411277771
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,10240,64,0,0.17762240171432495
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,10240,1,0,2.8149824142456055
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,10240,2,0,1.5060943603515624
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,10240,4,0,0.8292559623718262
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,10240,8,0,0.47807679176330564
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,10240,16,0,0.30671679973602295
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,10240,64,0,0.17979040145874023
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,10240,32,0,0.20418400764465333
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,12288,1,0,6.343732833862305
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,12288,2,0,2.332080078125
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,12288,4,0,1.2242671966552734
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,12288,8,0,0.6935088157653808
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,12288,16,0,0.4171567916870117
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,12288,32,0,0.2708192110061646
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,12288,64,0,0.21345279216766358
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,12288,2,0,2.0024991989135743
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,12288,4,0,1.0862223625183105
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,12288,8,0,0.6325808048248291
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,12288,1,0,3.948332977294922
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,12288,32,0,0.2614255905151367
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,12288,16,0,0.38738720417022704
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,12288,64,0,0.20991039276123047
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,16384,8,0,1.1018912315368652
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,16384,4,0,2.1248735427856444
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,16384,2,0,4.965697479248047
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,16384,1,0,11.737427520751954
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,16384,32,0,0.4123695850372314
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,16384,16,0,0.6491712093353271
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,16384,64,0,0.28438239097595214
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,16384,4,0,1.7445600509643555
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,16384,8,0,0.9676239967346192
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,16384,2,0,3.553871917724609
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,16384,1,0,7.577454376220703
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,16384,16,0,0.5891503810882568
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,16384,32,0,0.38949439525604246
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,16384,64,0,0.2747663974761963
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,32768,8,0,4.569252777099609
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,32768,4,0,10.695196533203125
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,32768,16,0,1.9826976776123046
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,32768,32,0,1.1663984298706054
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,32768,2,0,21.435453796386717
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,32768,64,0,0.7592607975006104
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,32768,4,0,6.973000335693359
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,32768,2,0,13.588548278808593
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,32768,1,0,49.461669921875
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,32768,8,0,3.2744831085205077
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,32768,16,0,1.720871925354004
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,32768,1,0,27.924334716796874
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,32768,32,0,1.0429583549499513
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,32768,64,0,0.6978975772857666
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,1,1,0,0.022703999280929567
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,1,2,0,0.016897599399089813
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,1,4,0,0.016596800088882445
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,1,8,0,0.0149167999625206
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,1,16,0,0.014705599844455719
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,1,32,0,0.014815999567508698
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,1,64,0,0.01491519957780838
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,1,2,0,0.023068800568580627
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,1,4,0,0.02295839935541153
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,1,1,0,0.02712480127811432
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,1,16,0,0.02298559993505478
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,1,32,0,0.022945599257946016
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,1,8,0,0.02312159985303879
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,1,64,0,0.023188799619674683
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,16,1,0,0.01927199959754944
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,16,2,0,0.01685599982738495
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,16,8,0,0.014871999621391296
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,16,4,0,0.014951999485492706
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,16,16,0,0.014926399290561675
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,16,32,0,0.016787199676036833
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,16,64,0,0.01677280068397522
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,16,1,0,0.029209598898887634
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,16,2,0,0.025073599815368653
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,16,4,0,0.02496960014104843
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,16,8,0,0.023131200671195985
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,16,32,0,0.02304159998893738
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,16,16,0,0.02290399968624115
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,16,64,0,0.021009600162506102
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,32,1,0,0.02101600021123886
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,32,2,0,0.016811199486255646
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,32,16,0,0.014880000054836274
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,32,4,0,0.016952000558376312
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,32,8,0,0.014830400049686433
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,32,32,0,0.01563040018081665
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,32,64,0,0.01639360040426254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,32,2,0,0.026980799436569215
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,32,1,0,0.03102560043334961
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,32,4,0,0.025019198656082153
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,32,8,0,0.02507199943065643
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,32,16,0,0.022916799783706664
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,32,32,0,0.022886399924755097
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,32,64,0,0.023080000281333925
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,64,1,0,0.025147199630737305
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,64,2,0,0.0190528005361557
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,64,8,0,0.016809600591659545
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,64,16,0,0.016924799978733064
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,64,4,0,0.01698720008134842
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,64,32,0,0.014839999377727509
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,64,64,0,0.014777599275112152
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,64,1,0,0.03536800146102905
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,64,4,0,0.025169599056243896
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,64,2,0,0.029022398591041564
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,64,8,0,0.02306399941444397
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,64,16,0,0.02322079986333847
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,64,32,0,0.023014399409294128
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,64,64,0,0.02313919961452484
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,128,1,0,0.033134400844573975
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,128,2,0,0.025006398558616638
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,128,4,0,0.01897439956665039
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,128,16,0,0.016884799301624297
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,128,32,0,0.016766400635242464
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,128,8,0,0.019196799397468566
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,128,64,0,0.01695999950170517
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,128,1,0,0.045449599623680115
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,128,2,0,0.03333759903907776
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,128,8,0,0.027059200406074523
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,128,4,0,0.02717280089855194
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,128,16,0,0.025171199440956117
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,128,32,0,0.024977600574493407
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,128,64,0,0.024935999512672426
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,256,1,0,0.0497296005487442
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,256,2,0,0.03135679960250855
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,256,4,0,0.02698560059070587
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,256,8,0,0.022939200699329376
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,256,16,0,0.02287999987602234
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,256,32,0,0.020870399475097657
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,256,64,0,0.0208639994263649
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,256,1,0,0.06404320001602173
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,256,2,0,0.043609601259231565
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,256,4,0,0.0352048009634018
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,256,8,0,0.03113600015640259
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,256,16,0,0.029228800535202028
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,256,32,0,0.027251198887825012
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,256,64,0,0.027083200216293336
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,512,1,0,0.09714080095291137
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,512,2,0,0.058078402280807497
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,512,4,0,0.03735840022563934
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,512,8,0,0.031327998638153075
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,512,16,0,0.029067200422286988
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,512,32,0,0.02520799934864044
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,512,64,0,0.02526240050792694
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,512,1,0,0.1194208025932312
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,512,2,0,0.0743120014667511
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,512,4,0,0.04758400022983551
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,512,8,0,0.03942880034446716
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,512,16,0,0.03522560000419617
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,512,32,0,0.03332639932632446
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,512,64,0,0.03325439989566803
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,1024,1,0,0.21056640148162842
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,1024,2,0,0.11937919855117798
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,1024,4,0,0.07028639912605286
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,1024,8,0,0.04699200093746185
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,1024,16,0,0.0393887996673584
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,1024,32,0,0.037329599261283875
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,1024,64,0,0.03534559905529022
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,1024,1,0,0.24412479400634765
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,1024,2,0,0.1379472017288208
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,1024,4,0,0.08612480163574218
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,1024,8,0,0.058052802085876466
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,1024,16,0,0.049691200256347656
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,1024,32,0,0.045433598756790164
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,1024,64,0,0.043566399812698366
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,1536,1,0,0.3410207986831665
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,1536,2,0,0.19103679656982422
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,1536,4,0,0.1116976022720337
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,1536,8,0,0.0718720018863678
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,1536,16,0,0.05162400007247925
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,1536,32,0,0.047681599855422974
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,1536,64,0,0.04367679953575134
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,1536,1,0,0.38206241130828855
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,1536,2,0,0.2142848014831543
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,1536,4,0,0.12931519746780396
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,1536,8,0,0.08238080143928528
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,1536,16,0,0.06393120288848878
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,1536,32,0,0.055959999561309814
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,1536,64,0,0.053827202320098876
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,2048,1,0,0.4994959831237793
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,2048,2,0,0.27576000690460206
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,2048,4,0,0.15741440057754516
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,2048,8,0,0.10029759407043456
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,2048,16,0,0.06406239867210388
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,2048,32,0,0.058107197284698486
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,2048,64,0,0.055851197242736815
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,2048,1,0,0.5422671794891357
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,2048,2,0,0.2978431940078735
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,2048,4,0,0.17423679828643798
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,2048,8,0,0.10915199518203736
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,2048,32,0,0.06613280177116394
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,2048,64,0,0.0618511974811554
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,2048,16,0,0.07428479790687562
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,3072,1,0,0.8832032203674316
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,3072,4,0,0.2704528093338013
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,3072,2,0,0.47255358695983884
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,3072,8,0,0.1632240056991577
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,3072,16,0,0.1070255994796753
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,3072,32,0,0.0807695984840393
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,3072,64,0,0.07416160106658935
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,3072,1,0,0.914891242980957
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,3072,2,0,0.4944479942321777
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,3072,4,0,0.283350396156311
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,3072,8,0,0.1723456025123596
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,3072,16,0,0.11523679494857789
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,3072,32,0,0.08856800198554993
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,3072,64,0,0.08245599865913392
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,4096,1,0,1.3438624382019042
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,4096,2,0,0.7343920230865478
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,4096,4,0,0.40127201080322267
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,4096,8,0,0.2394495964050293
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,4096,16,0,0.15217599868774415
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,4096,32,0,0.10309120416641235
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,4096,64,0,0.0946672022342682
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,4096,1,0,1.3541215896606444
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,4096,2,0,0.722603178024292
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,4096,8,0,0.2430272102355957
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,4096,4,0,0.41098880767822266
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,4096,16,0,0.16386879682540895
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,4096,32,0,0.11227999925613404
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,4096,64,0,0.10485440492630005
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,6144,1,0,2.646771240234375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,6144,2,0,1.3702272415161132
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,6144,4,0,0.7328320026397706
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,6144,8,0,0.421292781829834
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,6144,16,0,0.2620703935623169
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,6144,32,0,0.1753551959991455
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,6144,64,0,0.1359488010406494
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,6144,1,0,2.4625648498535155
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,6144,2,0,1.3047167778015136
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,6144,4,0,0.7174911975860596
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,6144,8,0,0.41307997703552246
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,6144,16,0,0.25887839794158934
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,6144,32,0,0.18225280046463013
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,6144,64,0,0.14603519439697266
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,8192,1,0,5.293479919433594
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,8192,2,0,2.2067840576171873
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,8192,8,0,0.659339189529419
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,8192,4,0,1.1856703758239746
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,8192,64,0,0.18077119588851928
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,8192,32,0,0.26040799617767335
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,8192,16,0,0.39027841091156007
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,8192,1,0,3.9580448150634764
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,8192,8,0,0.6197455883026123
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,8192,4,0,1.0860608100891114
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,8192,2,0,2.01715030670166
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,8192,16,0,0.3826479911804199
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,8192,32,0,0.25825440883636475
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,8192,64,0,0.18542720079421998
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,10240,2,0,3.8603649139404297
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,10240,1,0,8.938632202148437
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,10240,4,0,1.6571327209472657
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,10240,16,0,0.5420127868652344
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,10240,8,0,0.9391440391540528
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,10240,32,0,0.35339200496673584
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,10240,64,0,0.23115200996398927
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,10240,8,0,0.8567472457885742
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,10240,4,0,1.546609592437744
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,10240,2,0,2.925718307495117
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,10240,1,0,6.226147079467774
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,10240,16,0,0.5172431945800782
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,10240,32,0,0.3447216033935547
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,10240,64,0,0.23895840644836425
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,12288,8,0,1.2758511543273925
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,12288,4,0,2.566702461242676
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,12288,2,0,6.121708679199219
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,12288,1,0,13.77408447265625
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,12288,16,0,0.7310656070709228
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,12288,32,0,0.4569056034088135
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,12288,64,0,0.30290238857269286
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,12288,4,0,2.0649919509887695
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,12288,8,0,1.127139186859131
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,12288,2,0,3.9667022705078123
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,12288,1,0,8.765213012695312
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,12288,16,0,0.6748127937316895
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,12288,32,0,0.42727041244506836
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,12288,64,0,0.30611839294433596
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,16384,8,0,2.0738447189331053
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,16384,4,0,5.220580673217773
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,16384,2,0,10.983894348144531
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,16384,16,0,1.1527775764465331
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,16384,32,0,0.6881167888641357
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,16384,1,0,24.154971313476562
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,16384,64,0,0.4649055957794189
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,16384,4,0,3.7327041625976562
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,16384,8,0,1.7973567962646484
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,16384,2,0,7.1509757995605465
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,16384,1,0,15.506883239746093
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,16384,32,0,0.6476975917816162
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,16384,16,0,1.0281984329223632
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,16384,64,0,0.4448575973510742
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,1,1,0,0.029187199473381043
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,1,2,0,0.02102400064468384
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,1,4,0,0.016872000694274903
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,1,8,0,0.016784000396728515
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,1,16,0,0.014902399480342865
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,1,32,0,0.014878399670124054
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,1,64,0,0.014711999893188476
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,1,1,0,0.03317599892616272
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,1,2,0,0.02834399938583374
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,1,4,0,0.024940800666809083
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,1,8,0,0.023025600612163542
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,1,16,0,0.02295999974012375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,1,64,0,0.022870400547981264
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,1,32,0,0.022878399491310118
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,16,2,0,0.02088800072669983
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,16,1,0,0.027115198969841003
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,16,4,0,0.016841599345207216
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,16,8,0,0.01698720008134842
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,16,16,0,0.014758400619029999
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,16,32,0,0.016715200245380403
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,16,64,0,0.016940799355506898
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,16,1,0,0.037206399440765384
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,16,2,0,0.028563201427459717
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,16,4,0,0.024985599517822265
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,16,8,0,0.02499839961528778
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,16,16,0,0.023022399842739107
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,16,32,0,0.022998400032520294
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,16,64,0,0.02299039959907532
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,32,1,0,0.03113279938697815
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,32,2,0,0.022915199398994446
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,32,4,0,0.016817599534988403
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,32,8,0,0.015043200552463531
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,32,16,0,0.01488959938287735
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,32,32,0,0.01674239933490753
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,32,64,0,0.014744000136852264
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,32,1,0,0.03954879939556122
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,32,2,0,0.031190401315689086
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,32,4,0,0.025116801261901855
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,32,8,0,0.02351360023021698
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,32,16,0,0.025080001354217528
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,32,32,0,0.023153600096702576
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,32,64,0,0.024883200228214265
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,64,1,0,0.037529599666595456
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,64,2,0,0.025086399912834168
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,64,4,0,0.020878399908542632
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,64,8,0,0.016832000017166136
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,64,16,0,0.016811199486255646
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,64,32,0,0.014894400537014008
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,64,64,0,0.016784000396728515
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,64,1,0,0.04819200038909912
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,64,2,0,0.037215998768806456
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,64,4,0,0.0272816002368927
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,64,8,0,0.025260800123214723
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,64,16,0,0.02512640058994293
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,64,32,0,0.022815999388694764
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,64,64,0,0.023035199940204622
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,128,1,0,0.051841598749160764
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,128,2,0,0.03336159884929657
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,128,4,0,0.024910399317741395
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,128,8,0,0.020857599377632142
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,128,16,0,0.01884640008211136
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,128,32,0,0.016868799924850464
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,128,64,0,0.017071999609470367
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,128,1,0,0.06984959840774536
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,128,2,0,0.04543519914150238
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,128,4,0,0.033236798644065854
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,128,8,0,0.02900480031967163
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,128,16,0,0.02696479856967926
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,128,32,0,0.025180798768997193
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,128,64,0,0.02503199875354767
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,256,1,0,0.09076160192489624
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,256,2,0,0.051648002862930295
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,256,4,0,0.03327359855175018
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,256,8,0,0.027025601267814635
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,256,16,0,0.02324160039424896
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,256,32,0,0.02308479994535446
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,256,64,0,0.022844800353050233
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,256,1,0,0.11487040519714356
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,256,2,0,0.06591200232505798
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,256,4,0,0.045535999536514285
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,256,8,0,0.039208000898361205
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,256,16,0,0.03317919969558716
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,256,32,0,0.03127520084381104
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,256,64,0,0.029083201289176942
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,512,1,0,0.18006240129470824
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,512,2,0,0.10005919933319092
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,512,4,0,0.05983039736747742
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,512,8,0,0.03942880034446716
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,512,16,0,0.03324480056762695
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,512,32,0,0.029281601309776306
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,512,64,0,0.029238399863243104
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,512,1,0,0.2195375919342041
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,512,2,0,0.12488479614257812
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,512,4,0,0.07624959945678711
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,512,8,0,0.0517408013343811
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,512,16,0,0.04356319904327392
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,512,32,0,0.03922879993915558
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,512,64,0,0.0372655987739563
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,1024,1,0,0.3901535987854004
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,1024,2,0,0.21578559875488282
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,1024,4,0,0.125873601436615
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,1024,8,0,0.07629759907722473
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,1024,16,0,0.051095998287200926
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,1024,32,0,0.045561599731445315
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,1024,64,0,0.043331199884414674
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,1024,1,0,0.4493247985839844
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,1024,2,0,0.2499311923980713
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,1024,4,0,0.14516160488128663
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,1024,8,0,0.09091039896011352
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,1024,16,0,0.06387360095977783
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,1024,32,0,0.055771201848983765
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,1024,64,0,0.05186079740524292
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,1536,1,0,0.6569615840911865
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,1536,2,0,0.3500272035598755
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,1536,4,0,0.1989743947982788
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,1536,8,0,0.1211359977722168
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,1536,16,0,0.08040000200271606
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,1536,32,0,0.06170079708099365
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,1536,64,0,0.05577759742736817
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,1536,1,0,0.7231328010559082
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,1536,4,0,0.22284319400787353
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,1536,2,0,0.39550879001617434
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,1536,8,0,0.13632479906082154
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,1536,16,0,0.09470400214195251
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,1536,32,0,0.07221919894218445
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,1536,64,0,0.06604639887809753
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,2048,1,0,0.9532832145690918
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,2048,2,0,0.5163663864135742
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,2048,4,0,0.2842015981674194
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,2048,16,0,0.111080002784729
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,2048,8,0,0.17301440238952637
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,2048,32,0,0.07643839716911316
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,2048,64,0,0.06992319822311402
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,2048,1,0,1.0250559806823731
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,2048,2,0,0.5562607765197753
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,2048,8,0,0.18825440406799315
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,2048,4,0,0.3149359941482544
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,2048,16,0,0.12401759624481201
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,2048,32,0,0.08997759819030762
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,2048,64,0,0.08192319869995117
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,3072,1,0,1.7316703796386719
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,3072,2,0,0.899665641784668
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,3072,4,0,0.4998960018157959
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,3072,8,0,0.28423199653625486
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,3072,16,0,0.17961280345916747
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,3072,32,0,0.1235152006149292
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,3072,64,0,0.0983951985836029
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,3072,1,0,1.7392160415649414
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,3072,4,0,0.5151023864746094
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,3072,2,0,0.9369808197021484
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,3072,8,0,0.3046112060546875
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,3072,16,0,0.1932639956474304
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,3072,32,0,0.13924800157546996
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,3072,64,0,0.11116000413894653
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,4096,1,0,2.673878479003906
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,4096,8,0,0.4276864051818848
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,4096,4,0,0.7488976001739502
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,4096,2,0,1.3688112258911134
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,4096,16,0,0.26404800415039065
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,4096,32,0,0.17830079793930054
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,4096,64,0,0.12524640560150146
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,4096,1,0,2.622323226928711
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,4096,4,0,0.7525008201599122
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,4096,8,0,0.43491039276123045
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,4096,2,0,1.3824543952941895
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,4096,16,0,0.2754751920700073
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,4096,64,0,0.1409216046333313
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,4096,32,0,0.1910320043563843
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,6144,1,0,6.7504112243652346
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,6144,4,0,1.3778143882751466
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,6144,2,0,2.8432943344116213
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,6144,8,0,0.7677120208740235
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,6144,16,0,0.4606815814971924
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,6144,32,0,0.29330880641937257
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,6144,64,0,0.2103600025177002
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,6144,1,0,4.9480033874511715
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,6144,2,0,2.505588722229004
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,6144,4,0,1.3294384002685546
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,6144,8,0,0.7611983776092529
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,6144,16,0,0.45683841705322265
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,6144,32,0,0.30341598987579343
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,6144,64,0,0.22277278900146485
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,8192,8,0,1.198521614074707
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,8192,4,0,2.4656240463256838
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,8192,1,0,12.652788543701172
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,8192,2,0,5.441888046264649
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,8192,16,0,0.6902448177337647
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,8192,32,0,0.44054880142211916
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,8192,64,0,0.3083359956741333
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,8192,4,0,2.0751392364501955
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,8192,1,0,8.786702728271484
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,8192,8,0,1.1531503677368165
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,8192,2,0,4.040280151367187
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,8192,16,0,0.6818927764892578
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,8192,32,0,0.4363391876220703
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,8192,64,0,0.31699039936065676
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,1,1,0,0.03908640146255493
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,1,2,0,0.026131200790405273
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,1,4,0,0.02133920043706894
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,1,8,0,0.016758400201797485
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,1,16,0,0.01494400054216385
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,1,32,0,0.01494400054216385
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,1,64,0,0.014851200580596923
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,1,1,0,0.04147520065307617
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,1,2,0,0.031035199761390686
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,1,4,0,0.02786880135536194
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,1,8,0,0.022961600124835967
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,1,16,0,0.02300959974527359
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,1,32,0,0.023027199506759643
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,1,64,0,0.02305919975042343
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,16,1,0,0.03948639929294586
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,16,2,0,0.027003198862075806
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,16,4,0,0.020865599811077117
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,16,8,0,0.016787199676036833
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,16,16,0,0.01682240068912506
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,16,32,0,0.014753599464893342
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,16,64,0,0.014726400375366211
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,16,1,0,0.04954720139503479
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,16,2,0,0.03528000116348266
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,16,4,0,0.02905600070953369
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,16,8,0,0.02475520074367523
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,16,16,0,0.02295359969139099
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,16,32,0,0.022838400304317476
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,16,64,0,0.02314079999923706
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,32,1,0,0.04548160135746002
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,32,2,0,0.031913599371910094
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,32,4,0,0.022814400494098663
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,32,8,0,0.018969599902629853
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,32,16,0,0.01610559970140457
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,32,32,0,0.016899199783802034
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,32,64,0,0.016764800250530242
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,32,1,0,0.05618720054626465
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,32,2,0,0.04133920073509216
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,32,4,0,0.031118398904800414
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,32,16,0,0.025012800097465517
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,32,32,0,0.02494560033082962
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,32,8,0,0.025672000646591187
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,32,64,0,0.023014399409294128
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,64,1,0,0.05999839901924133
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,64,2,0,0.037299200892448425
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,64,4,0,0.027031999826431275
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,64,8,0,0.020878399908542632
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,64,16,0,0.017366400361061095
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,64,32,0,0.01679359972476959
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,64,64,0,0.016840000450611115
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,64,1,0,0.07614399790763855
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,64,2,0,0.04955199956893921
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,64,4,0,0.03710399866104126
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,64,8,0,0.027153599262237548
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,64,16,0,0.027166399359703063
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,64,32,0,0.025092801451683043
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,64,64,0,0.02499680072069168
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,128,1,0,0.0923200011253357
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,128,2,0,0.05552319884300232
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,128,4,0,0.0352400004863739
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,128,8,0,0.02502079904079437
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,128,16,0,0.02173759937286377
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,128,32,0,0.019204799830913544
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,128,64,0,0.01964160054922104
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,128,1,0,0.11719039678573609
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,128,2,0,0.07018399834632874
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,128,4,0,0.04559679925441742
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,128,8,0,0.03471679985523224
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,128,16,0,0.02922079861164093
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,128,32,0,0.027208000421524048
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,128,64,0,0.02707040011882782
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,256,1,0,0.16194560527801513
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,256,2,0,0.09279999732971192
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,256,4,0,0.05586400032043457
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,256,8,0,0.03723039925098419
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,256,16,0,0.031159999966621398
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,256,32,0,0.027054399251937866
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,256,64,0,0.026984000205993654
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,256,1,0,0.20341598987579346
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,256,2,0,0.1166767954826355
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,256,4,0,0.07014880180358887
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,256,8,0,0.04769119918346405
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,256,16,0,0.04130240082740784
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,256,32,0,0.03716799914836884
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,256,64,0,0.03534879982471466
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,512,1,0,0.3370703935623169
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,512,2,0,0.18442239761352539
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,512,4,0,0.1053056001663208
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,512,8,0,0.0657920002937317
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,512,16,0,0.04553279876708984
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,512,32,0,0.03929600119590759
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,512,64,0,0.03519040048122406
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,512,1,0,0.40377440452575686
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,512,2,0,0.22468960285186768
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,512,4,0,0.13137919902801515
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,512,8,0,0.08242400288581848
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,512,16,0,0.05965920090675354
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,512,32,0,0.05163840055465698
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,512,64,0,0.04546720087528229
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,1024,1,0,0.7475503921508789
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,1024,2,0,0.4008175849914551
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,1024,4,0,0.22684800624847412
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,1024,8,0,0.1359760046005249
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,1024,16,0,0.08914719820022583
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,1024,32,0,0.06398079991340637
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,1024,64,0,0.05791360139846802
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,1024,1,0,0.8636704444885254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,1024,2,0,0.4658383846282959
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,1024,4,0,0.26367199420928955
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,1024,8,0,0.16064319610595704
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,1024,16,0,0.10863200426101685
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,1024,32,0,0.07835839986801148
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,1024,64,0,0.07008960247039794
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,1536,1,0,1.2633935928344726
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,1536,4,0,0.3665184020996094
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,1536,2,0,0.6747024059295654
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,1536,8,0,0.2170464038848877
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,1536,16,0,0.13983360528945923
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,1536,32,0,0.09670720100402833
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,1536,64,0,0.0783407986164093
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,1536,1,0,1.3854080200195313
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,1536,2,0,0.7457568168640136
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,1536,8,0,0.24783520698547362
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,1536,16,0,0.16037440299987793
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,1536,4,0,0.41266398429870604
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,1536,32,0,0.11721279621124267
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,1536,64,0,0.09273759722709655
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,2048,1,0,1.844416046142578
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,2048,2,0,0.972054386138916
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,2048,4,0,0.5395760059356689
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,2048,8,0,0.3104464054107666
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,2048,16,0,0.19288320541381837
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,2048,32,0,0.1349776029586792
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,2048,64,0,0.09974719882011414
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,2048,1,0,1.9971391677856445
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,2048,2,0,1.054588794708252
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,2048,4,0,0.5797664165496826
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,2048,8,0,0.3418495893478394
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,2048,16,0,0.2178112030029297
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,2048,32,0,0.15407999753952026
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,2048,64,0,0.11804319620132446
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,3072,1,0,3.5416801452636717
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,3072,2,0,1.7620080947875976
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,3072,8,0,0.5242479801177978
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,3072,4,0,0.9392512321472168
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,3072,16,0,0.3231215953826904
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,3072,64,0,0.15700639486312867
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,3072,32,0,0.21526079177856444
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,3072,1,0,3.413030242919922
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,3072,2,0,1.7993791580200196
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,3072,4,0,0.9695856094360351
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,3072,16,0,0.3469583988189697
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,3072,8,0,0.5628623962402344
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,3072,64,0,0.18052959442138672
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,3072,32,0,0.23802399635314941
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,4096,2,0,3.0077295303344727
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,4096,1,0,6.343604660034179
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,4096,4,0,1.4125311851501465
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,4096,8,0,0.799399995803833
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,4096,16,0,0.47467842102050783
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,4096,32,0,0.307207989692688
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,4096,64,0,0.22506721019744874
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,4096,4,0,1.4334832191467286
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,4096,2,0,2.6873552322387697
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,4096,8,0,0.8091936111450195
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,4096,1,0,5.17947998046875
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,4096,32,0,0.3295743942260742
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,4096,16,0,0.4964735984802246
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,4096,64,0,0.248419189453125
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,1,1,0,0.0573248028755188
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,1,2,0,0.034955200552940366
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,1,4,0,0.02497439980506897
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,1,8,0,0.02099840044975281
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,1,16,0,0.017025600373744964
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,1,32,0,0.014819200336933135
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,1,64,0,0.014847999811172486
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,1,1,0,0.06008800268173218
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,1,2,0,0.03978239893913269
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,1,4,0,0.031121599674224853
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,1,8,0,0.027185600996017457
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,1,16,0,0.025016000866889952
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,1,32,0,0.022942399978637694
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,1,64,0,0.02298559993505478
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,16,1,0,0.06616640090942383
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,16,2,0,0.03956800103187561
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,16,4,0,0.02760159969329834
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,16,8,0,0.020843200385570526
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,16,16,0,0.01679999977350235
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,16,32,0,0.0168272003531456
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,16,64,0,0.016847999393939973
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,16,1,0,0.07413600087165832
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,16,2,0,0.04960640072822571
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,16,4,0,0.03722879886627197
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,16,8,0,0.02908639907836914
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,16,16,0,0.02500160038471222
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,16,32,0,0.02496960014104843
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,16,64,0,0.02327360063791275
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,32,1,0,0.08027999997138976
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,32,2,0,0.04577760100364685
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,32,4,0,0.031214401125907898
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,32,8,0,0.02298240065574646
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,32,16,0,0.01679839938879013
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,32,32,0,0.016908800601959227
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,32,64,0,0.016752000153064727
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,32,1,0,0.09288319945335388
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,32,2,0,0.05586400032043457
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,32,4,0,0.04147520065307617
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,32,8,0,0.031225600838661195
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,32,16,0,0.027160000801086426
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,32,32,0,0.025080001354217528
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,32,64,0,0.02304159998893738
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,64,1,0,0.11164159774780273
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,64,2,0,0.06198559999465943
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,64,4,0,0.03938559889793396
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,64,8,0,0.02709760069847107
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,64,16,0,0.020927999913692475
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,64,32,0,0.020868800580501556
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,64,64,0,0.01879040002822876
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,64,1,0,0.1366368055343628
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,64,2,0,0.07835360169410706
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,64,4,0,0.051801598072052
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,64,8,0,0.037243199348449704
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,64,16,0,0.02924000024795532
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,64,32,0,0.02707839906215668
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,64,64,0,0.027091199159622194
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,128,1,0,0.16892319917678833
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,128,2,0,0.0946399986743927
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,128,4,0,0.05581279993057251
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,128,8,0,0.03741439878940582
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,128,16,0,0.0272271990776062
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,128,32,0,0.025009599328041077
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,128,64,0,0.022969600558280946
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,128,1,0,0.21631360054016113
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,128,2,0,0.12126239538192748
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,128,4,0,0.07214400172233582
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,128,8,0,0.049614399671554565
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,128,16,0,0.03935680091381073
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,128,32,0,0.0331712007522583
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,128,64,0,0.03127039968967438
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,256,1,0,0.3074079990386963
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,256,2,0,0.1680016040802002
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,256,4,0,0.09684320092201233
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,256,8,0,0.05965120196342468
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,256,16,0,0.04323999881744385
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,256,32,0,0.035417601466178894
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,256,64,0,0.033160001039505005
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,256,1,0,0.3811424016952515
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,256,4,0,0.1248047947883606
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,256,2,0,0.2112031936645508
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,256,8,0,0.07822399735450744
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,256,16,0,0.054795199632644655
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,256,32,0,0.047727999091148374
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,256,64,0,0.04195840060710907
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,512,1,0,0.6438560009002685
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,512,2,0,0.34539361000061036
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,512,4,0,0.19584640264511108
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,512,8,0,0.11893600225448608
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,512,16,0,0.07819520235061646
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,512,32,0,0.05580959916114807
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,512,64,0,0.05069440007209778
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,512,1,0,0.7779632091522217
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,512,2,0,0.41939678192138674
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,512,4,0,0.23994081020355223
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,512,8,0,0.14602400064468385
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,512,32,0,0.07204959988594055
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,512,16,0,0.0990831971168518
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,512,64,0,0.06418079733848572
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,1024,2,0,0.7788352012634278
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,1024,1,0,1.4518783569335938
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,1024,4,0,0.4242432117462158
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,1024,8,0,0.2504096031188965
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,1024,16,0,0.1599679946899414
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,1024,32,0,0.11303039789199829
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,1024,64,0,0.08442559838294983
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,1024,1,0,1.6744112014770507
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,1024,4,0,0.4926752090454102
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,1024,2,0,0.8922592163085937
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,1024,8,0,0.291265606880188
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,1024,16,0,0.1885424017906189
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,1024,32,0,0.13590079545974731
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,1024,64,0,0.10706239938735962
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,1536,1,0,2.4526880264282225
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,1536,2,0,1.2977359771728516
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,1536,4,0,0.7018400192260742
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,1536,8,0,0.40492801666259765
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,1536,16,0,0.2505199909210205
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,1536,64,0,0.132804799079895
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,1536,1,0,2.7224752426147463
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,1536,32,0,0.17487360239028932
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,1536,2,0,1.4257807731628418
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,1536,4,0,0.7848752021789551
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,1536,8,0,0.4581759929656982
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,1536,16,0,0.29083681106567383
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,1536,32,0,0.20346879959106445
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,1536,64,0,0.1599984049797058
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,2048,2,0,1.9153039932250977
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,2048,1,0,3.861430358886719
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,2048,4,0,1.0182064056396485
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,2048,8,0,0.5776800155639649
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,2048,16,0,0.3547568082809448
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,2048,32,0,0.24165759086608887
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,2048,64,0,0.1803663969039917
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,2048,8,0,0.6375360012054443
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,2048,4,0,1.1197999954223632
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,2048,2,0,2.0560319900512694
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,2048,1,0,3.9005550384521483
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,2048,32,0,0.2770544052124023
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,2048,16,0,0.3980463981628418
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,2048,64,0,0.21245119571685792
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,1,1,0,0.09282240271568298
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,1,2,0,0.0537280023097992
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,1,4,0,0.03315359950065613
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,1,8,0,0.025088000297546386
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,1,16,0,0.02099040001630783
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,1,32,0,0.016700799763202667
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,1,64,0,0.014731200039386749
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,1,1,0,0.0987504005432129
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,1,2,0,0.06005120277404785
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,1,4,0,0.04141440093517303
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,1,8,0,0.03323520123958588
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,1,16,0,0.02754240036010742
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,1,32,0,0.023089599609375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,1,64,0,0.022998400032520294
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,16,2,0,0.06621919870376587
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,16,1,0,0.11691839694976806
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,16,16,0,0.02093600034713745
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,16,8,0,0.02887200117111206
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,16,4,0,0.0395440012216568
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,16,32,0,0.018750399351119995
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,16,64,0,0.016899199783802034
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,16,1,0,0.13040800094604493
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,16,2,0,0.07608799934387207
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,16,4,0,0.04958080053329468
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,16,8,0,0.03727039992809296
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,16,16,0,0.029084798693656922
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,16,32,0,0.025275200605392456
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,16,64,0,0.02508319914340973
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,32,1,0,0.14927200078964234
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,32,2,0,0.08068959712982178
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,32,4,0,0.04751999974250794
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,32,8,0,0.03324480056762695
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,32,16,0,0.025110399723052977
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,32,32,0,0.020953600108623505
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,32,1,0,0.1659343957901001
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,32,64,0,0.018467199802398682
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,32,2,0,0.09445440173149108
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,32,8,0,0.04340960085391998
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,32,4,0,0.05987840294837952
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,32,16,0,0.03334240019321442
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,32,32,0,0.02739039957523346
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,32,64,0,0.027065598964691163
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,64,1,0,0.20945439338684083
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,64,2,0,0.11343519687652588
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,64,4,0,0.06454240083694458
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,64,8,0,0.04338720142841339
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,64,16,0,0.0314191997051239
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,64,32,0,0.025043201446533204
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,64,64,0,0.02300799936056137
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,64,1,0,0.2500864028930664
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,64,2,0,0.13893760442733766
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,64,4,0,0.08253920078277588
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,64,8,0,0.05585600137710571
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,64,16,0,0.04114879965782166
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,64,32,0,0.03342719972133636
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,64,64,0,0.031206399202346802
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,128,1,0,0.3227263927459717
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,128,2,0,0.17714879512786866
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,128,8,0,0.062001597881317136
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,128,4,0,0.1007423996925354
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,128,16,0,0.043459200859069826
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,128,32,0,0.035278400778770445
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,128,64,0,0.029185599088668822
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,128,1,0,0.40174241065979005
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,128,2,0,0.22262558937072754
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,128,4,0,0.12650400400161743
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,128,8,0,0.08111519813537597
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,128,32,0,0.04560160040855408
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,128,16,0,0.05652639865875244
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,128,64,0,0.04144960045814514
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,256,1,0,0.5905327796936035
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,256,2,0,0.31781280040740967
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,256,4,0,0.17885600328445433
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,256,8,0,0.1094480037689209
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,256,16,0,0.07225279808044434
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,256,32,0,0.052902400493621826
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,256,64,0,0.04771519899368286
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,256,1,0,0.7302015781402588
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,256,2,0,0.39773120880126955
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,256,4,0,0.22436161041259767
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,256,16,0,0.09491519927978516
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,256,8,0,0.13994879722595216
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,256,32,0,0.06987040042877198
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,256,64,0,0.06308799982070923
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,512,1,0,1.2630767822265625
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,512,2,0,0.6655648231506348
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,512,4,0,0.37005441188812255
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,512,8,0,0.2163151979446411
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,512,16,0,0.1408400058746338
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,512,32,0,0.10083999633789062
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,512,64,0,0.07874559760093688
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,512,1,0,1.5031135559082032
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,512,2,0,0.8065759658813476
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,512,4,0,0.4474976062774658
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,512,8,0,0.2690112113952637
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,512,16,0,0.1747488021850586
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,512,32,0,0.12769919633865356
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,512,64,0,0.10103039741516114
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,1024,2,0,1.4909791946411133
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,1024,1,0,2.8638671875
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,1024,4,0,0.8238991737365723
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,1024,8,0,0.46933279037475584
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,1024,16,0,0.2937632083892822
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,1024,32,0,0.2053760051727295
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,1024,64,0,0.15976159572601317
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,1024,1,0,3.283603286743164
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,1024,4,0,0.9510208129882812
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,1024,8,0,0.5519936084747314
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,1024,2,0,1.7235679626464844
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,1024,32,0,0.2479408025741577
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,1024,16,0,0.34845120906829835
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,1,1,0,0.18583199977874756
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,1024,64,0,0.19413440227508544
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,1,2,0,0.09380480051040649
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,1,8,0,0.033395200967788696
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,1,4,0,0.05394560098648071
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,1,16,0,0.025166401267051698
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,1,32,0,0.02096160054206848
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,1,64,0,0.016872000694274903
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,1,1,0,0.17482240200042726
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,1,2,0,0.09897119998931884
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,1,4,0,0.060017597675323484
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,1,8,0,0.04135040044784546
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,1,16,0,0.031488001346588135
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,1,32,0,0.027011200785636902
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,1,64,0,0.025166401267051698
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,16,1,0,0.2247215986251831
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,16,2,0,0.11903200149536133
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,16,4,0,0.06944640278816223
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,16,8,0,0.04342080056667328
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,16,16,0,0.029179200530052185
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,16,64,0,0.018875199556350707
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,16,32,0,0.023097600042819976
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,16,2,0,0.129584002494812
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,16,1,0,0.24149599075317382
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,16,4,0,0.07837759852409362
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,16,8,0,0.051583999395370485
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,16,16,0,0.03726080060005188
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,16,32,0,0.03128319978713989
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,16,64,0,0.02720479965209961
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,32,1,0,0.2844304084777832
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,32,4,0,0.08328800201416016
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,32,8,0,0.050425601005554196
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,32,16,0,0.03733600080013275
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,32,2,0,0.15230879783630372
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,32,32,0,0.02712959945201874
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,32,64,0,0.022961600124835967
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,32,1,0,0.3118848085403442
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,32,2,0,0.17004480361938476
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,32,4,0,0.09888160228729248
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,32,8,0,0.06417919993400574
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,32,32,0,0.03739199936389923
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,32,64,0,0.03121280074119568
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,32,16,0,0.045553600788116454
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,64,1,0,0.4031263828277588
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,64,2,0,0.21352479457855225
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,64,4,0,0.12139999866485596
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,64,8,0,0.07020320296287537
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,64,16,0,0.04762240052223206
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,64,32,0,0.03735840022563934
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,64,64,0,0.03104960024356842
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,64,1,0,0.4757664203643799
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,64,2,0,0.256932806968689
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,64,4,0,0.14604480266571046
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,64,8,0,0.08873599767684937
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,64,16,0,0.06178560256958008
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,64,32,0,0.047651201486587524
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,64,64,0,0.04145439863204956
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,128,2,0,0.3338752031326294
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,128,1,0,0.6223328113555908
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,128,4,0,0.1867743968963623
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,128,8,0,0.1112671971321106
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,128,16,0,0.07590559720993043
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,128,32,0,0.055820798873901366
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,128,64,0,0.04550240039825439
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,128,1,0,0.7757855892181397
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,128,2,0,0.4179984092712402
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,128,8,0,0.14178240299224854
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,128,4,0,0.23624000549316407
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,128,16,0,0.09860479831695557
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,128,32,0,0.07024800181388854
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,128,64,0,0.05983999967575073
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,256,1,0,1.1440112113952636
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,256,2,0,0.6121568202972412
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,256,4,0,0.3426032066345215
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,256,8,0,0.20278239250183105
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,256,32,0,0.09478719830513001
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,256,16,0,0.13200160264968872
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,256,64,0,0.07576159834861755
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,256,1,0,1.4184224128723144
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,256,2,0,0.7596384048461914
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,256,4,0,0.42284321784973145
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,256,8,0,0.2541919946670532
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,256,16,0,0.16862399578094484
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,256,32,0,0.12140640020370483
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,256,64,0,0.09912319779396057
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,512,1,0,2.478713607788086
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,512,4,0,0.7167295932769775
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,512,8,0,0.4127664089202881
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,512,2,0,1.2931424140930177
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,512,16,0,0.2642672061920166
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,512,32,0,0.18691200017929077
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,512,64,0,0.14583200216293335
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,512,1,0,2.9642303466796873
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,512,2,0,1.5716496467590333
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,512,8,0,0.5036143779754638
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,512,4,0,0.8634448051452637
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,512,16,0,0.3248863935470581
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,512,32,0,0.23222720623016357
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,512,64,0,0.18329919576644899
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,1,1,0,0.3351903915405273
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,1,2,0,0.17476160526275636
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,1,4,0,0.09520000219345093
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,1,16,0,0.035411199927330016
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,1,8,0,0.05581600069999695
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,1,32,0,0.025086399912834168
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,256,1,64,0,0.01903360038995743
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,1,1,0,0.3306623935699463
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,1,4,0,0.098990398645401
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,1,2,0,0.1756767988204956
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,1,8,0,0.06032480001449585
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,1,16,0,0.041366401314735415
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,1,32,0,0.03117760121822357
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,256,1,64,0,0.02720640003681183
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,16,1,0,0.437824010848999
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,16,2,0,0.23025760650634766
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,16,4,0,0.12182559967041015
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,16,8,0,0.07006239891052246
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,16,16,0,0.04576480090618133
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,16,32,0,0.03150559961795807
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,256,16,64,0,0.025460800528526305
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,16,1,0,0.4488671779632568
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,16,2,0,0.2439568042755127
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,16,4,0,0.1341088056564331
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,16,8,0,0.08042560219764709
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,16,16,0,0.054016000032424925
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,16,32,0,0.04141919910907745
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,256,16,64,0,0.035339200496673585
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,32,1,0,0.5452640056610107
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,32,2,0,0.2904239892959595
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,32,4,0,0.15986080169677735
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,32,8,0,0.09015039801597595
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,32,16,0,0.05572800040245056
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,32,32,0,0.041577601432800294
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,256,32,64,0,0.03336159884929657
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,32,1,0,0.5926527976989746
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,32,2,0,0.31795520782470704
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,32,4,0,0.17618720531463622
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,32,8,0,0.10481760501861573
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,32,16,0,0.07012959718704223
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,32,32,0,0.05281919836997986
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,256,32,64,0,0.043377599120140074
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,64,2,0,0.4131167888641357
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,64,4,0,0.22518720626831054
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,64,1,0,0.7833983898162842
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,64,16,0,0.08449919819831848
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,64,8,0,0.13175679445266725
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,64,32,0,0.05990880131721497
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,256,64,64,0,0.04961279928684235
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,64,1,0,0.9190735816955566
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,64,2,0,0.4868192195892334
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,64,4,0,0.2724720001220703
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,64,8,0,0.16040960550308228
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,64,16,0,0.10505599975585937
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,64,32,0,0.07734079957008362
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,256,64,64,0,0.06422240138053895
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,128,1,0,1.2120800018310547
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,128,2,0,0.645900821685791
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,128,4,0,0.35405120849609373
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,128,8,0,0.21016321182250977
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,128,16,0,0.1334768056869507
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,128,32,0,0.09681280255317688
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,256,128,64,0,0.07620319724082947
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,128,1,0,1.5117759704589844
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,128,2,0,0.7991600036621094
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,128,4,0,0.44536800384521485
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,128,8,0,0.26507999897003176
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,128,16,0,0.17163039445877076
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,128,32,0,0.12537120580673217
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,256,128,64,0,0.09900320172309876
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,256,2,0,1.1956751823425293
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,256,1,0,2.243951988220215
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,256,4,0,0.6537360191345215
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,256,8,0,0.3871056079864502
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,256,16,0,0.24657759666442872
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,256,32,0,0.1765231966972351
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,256,256,64,0,0.13965280055999757
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,256,1,0,2.8078079223632812
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,256,2,0,1.4831727981567382
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,256,4,0,0.8137552261352539
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,256,8,0,0.4810351848602295
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,256,16,0,0.309988808631897
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,256,32,0,0.22595679759979248
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,1,1,0,0.01685439944267273
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,256,256,64,0,0.1790671944618225
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,1,2,0,0.01501920074224472
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,1,4,0,0.01475519984960556
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,1,8,0,0.014795200526714325
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,1,16,0,0.015123200416564942
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,1,32,0,0.015046399831771851
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,1,64,0,0.014868800342082978
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,1,1,0,0.02333119958639145
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,1,2,0,0.0231904000043869
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,1,4,0,0.023073600232601167
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,1,8,0,0.023127999901771546
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,1,16,0,0.021073600649833678
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,1,32,0,0.022414399683475493
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,1,64,0,0.023022399842739107
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,16,1,0,0.01703680008649826
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,16,2,0,0.016726399958133697
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,16,4,0,0.016784000396728515
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,16,8,0,0.014963200688362122
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,16,16,0,0.014903999865055084
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,16,32,0,0.014868800342082978
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,16,1,0,0.023545600473880768
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,16,64,0,0.014988799393177033
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,16,2,0,0.023104000091552734
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,16,4,0,0.020904000103473663
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,16,8,0,0.02314079999923706
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,16,16,0,0.021057599782943727
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,16,32,0,0.02303680032491684
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,16,64,0,0.022968000173568724
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,32,1,0,0.019814400374889372
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,32,2,0,0.01685280054807663
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,32,4,0,0.016763199865818024
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,32,8,0,0.016838400065898894
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,32,16,0,0.01496639996767044
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,32,32,0,0.014921599626541137
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,32,64,0,0.015080000460147857
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,32,1,0,0.024991999566555022
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,32,2,0,0.02306720018386841
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,32,4,0,0.02374400049448013
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,32,8,0,0.023080000281333925
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,32,16,0,0.022951999306678773
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,32,32,0,0.02305919975042343
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,64,1,0,0.020761600136756896
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,32,64,0,0.023236800730228425
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,64,2,0,0.016836799681186676
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,64,4,0,0.016737599670886994
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,64,8,0,0.016787199676036833
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,64,32,0,0.014910399913787842
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,64,16,0,0.014979200065135955
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,64,64,0,0.016979199647903443
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,64,1,0,0.029212799668312073
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,64,2,0,0.025407999753952026
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,64,4,0,0.023073600232601167
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,64,8,0,0.02502720057964325
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,64,16,0,0.02298559993505478
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,64,32,0,0.02298080027103424
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,64,64,0,0.024009600281715393
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,128,1,0,0.023078399896621703
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,128,2,0,0.018723200261592864
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,128,4,0,0.016812799870967864
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,128,8,0,0.0171424001455307
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,128,16,0,0.01677280068397522
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,128,64,0,0.014913600683212281
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,128,32,0,0.0171984001994133
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,128,1,0,0.033267199993133545
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,128,2,0,0.029139199852943422
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,128,4,0,0.027063998579978942
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,128,8,0,0.025148800015449523
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,128,16,0,0.025009599328041077
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,128,32,0,0.023048000037670137
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,128,64,0,0.023104000091552734
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,256,1,0,0.031206399202346802
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,256,2,0,0.02510400116443634
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,256,4,0,0.022915199398994446
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,256,8,0,0.021003200113773345
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,256,32,0,0.021006399393081666
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,256,16,0,0.019047999382019044
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,256,64,0,0.01892160028219223
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,256,1,0,0.04349440038204193
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,256,2,0,0.035288000106811525
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,256,4,0,0.031097599864006044
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,256,8,0,0.026848000288009644
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,256,16,0,0.02714560031890869
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,256,32,0,0.02707839906215668
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,256,64,0,0.027160000801086426
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,512,1,0,0.05600799918174744
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,512,2,0,0.03540799915790558
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,512,4,0,0.029399999976158143
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,512,8,0,0.025084799528121947
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,512,16,0,0.02489120066165924
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,512,32,0,0.022945599257946016
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,512,64,0,0.023179200291633607
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,512,1,0,0.06894879937171935
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,512,2,0,0.04554080069065094
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,512,4,0,0.03742879927158356
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,512,8,0,0.03345440030097961
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,512,16,0,0.03128480017185211
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,512,32,0,0.02919679880142212
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,512,64,0,0.031163200736045837
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,1024,1,0,0.11476000547409057
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,1024,4,0,0.0414000004529953
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,1024,8,0,0.037385600805282596
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,1024,2,0,0.0661791980266571
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,1024,16,0,0.03331040143966675
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,1024,32,0,0.03132160007953644
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,1024,64,0,0.02916960120201111
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,1024,1,0,0.1338047981262207
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,1024,2,0,0.07996640205383301
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,1024,4,0,0.05139840245246887
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,1024,8,0,0.0441103994846344
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,1024,16,0,0.03933599889278412
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,1024,32,0,0.03939520120620728
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,1024,64,0,0.037150400876998904
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,1536,1,0,0.18431040048599243
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,1536,2,0,0.10513919591903687
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,1536,4,0,0.06583520174026489
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,1536,8,0,0.045480000972747806
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,1536,16,0,0.04101920127868652
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,1536,32,0,0.039350399374961854
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,1536,64,0,0.03739199936389923
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,1536,2,0,0.12110400199890137
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,1536,4,0,0.07619680166244507
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,1536,8,0,0.055088001489639285
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,1536,1,0,0.2055408000946045
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,1536,16,0,0.04848800003528595
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,1536,64,0,0.045579200983047484
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,1536,32,0,0.045480000972747806
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,2048,1,0,0.2637264013290405
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,2048,2,0,0.15
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,2048,4,0,0.08943039774894715
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,2048,8,0,0.056796801090240476
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,2048,16,0,0.04966239929199219
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,2048,32,0,0.04540160000324249
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,2048,64,0,0.04567359983921051
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,2048,2,0,0.1624127984046936
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,2048,1,0,0.28588640689849854
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,2048,4,0,0.100108802318573
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,2048,8,0,0.06430559754371643
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,2048,16,0,0.05785120129585266
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,2048,32,0,0.05187199711799621
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,2048,64,0,0.049700799584388736
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,3072,1,0,0.466431999206543
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,3072,2,0,0.2533584117889404
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,3072,4,0,0.1493008017539978
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,3072,8,0,0.09494879841804504
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,3072,16,0,0.0677727997303009
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,3072,32,0,0.06182079911231995
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,3072,64,0,0.05996479988098145
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,3072,1,0,0.481763219833374
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,3072,2,0,0.2681936025619507
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,3072,4,0,0.1577296018600464
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,3072,8,0,0.09887199997901916
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,3072,16,0,0.07426400184631347
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,3072,32,0,0.06800799965858459
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,3072,64,0,0.06396480202674866
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,4096,1,0,0.7076784133911133
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,4096,2,0,0.38756160736083983
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,4096,4,0,0.2204655885696411
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,4096,8,0,0.13419840335845948
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,4096,16,0,0.08676159977912903
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,4096,32,0,0.07802720069885254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,4096,64,0,0.07439360022544861
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,4096,1,0,0.7010704040527344
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,4096,2,0,0.38619840145111084
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,4096,4,0,0.22289440631866456
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,4096,8,0,0.13782399892807007
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,4096,32,0,0.08265600204467774
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,4096,16,0,0.09270240068435669
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,4096,64,0,0.08008800148963928
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,6144,1,0,1.317568016052246
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,6144,8,0,0.2373647928237915
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,6144,2,0,0.7213583946228027
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,6144,4,0,0.39439361095428466
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,6144,16,0,0.14712159633636473
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,6144,32,0,0.10994399785995483
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,6144,64,0,0.10294400453567505
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,6144,2,0,0.6773439884185791
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,6144,1,0,1.2603216171264648
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,6144,4,0,0.38500800132751467
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,6144,8,0,0.23010239601135254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,6144,16,0,0.14596159458160402
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,6144,32,0,0.11319680213928222
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,6144,64,0,0.10717600584030151
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,8192,2,0,1.117903995513916
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,8192,1,0,2.335683250427246
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,8192,4,0,0.6151855945587158
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,8192,8,0,0.36066720485687254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,8192,16,0,0.22366878986358643
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,8192,32,0,0.1440384030342102
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,8192,64,0,0.1316159963607788
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,8192,1,0,1.9742063522338866
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,8192,2,0,1.0377840042114257
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,8192,4,0,0.582257604598999
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,8192,8,0,0.33585920333862307
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,8192,16,0,0.2159775972366333
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,8192,32,0,0.14175200462341309
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,8192,64,0,0.13367680311203003
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,10240,1,0,3.4911006927490233
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,10240,4,0,0.8746800422668457
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,10240,8,0,0.5103936195373535
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,10240,2,0,1.6526464462280273
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,10240,16,0,0.3078399896621704
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,10240,32,0,0.18856960535049438
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,10240,64,0,0.16499359607696534
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,10240,1,0,2.855739212036133
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,10240,2,0,1.472268772125244
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,10240,4,0,0.801030445098877
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,10240,8,0,0.4662384033203125
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,10240,16,0,0.2902976036071777
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,10240,32,0,0.18354719877243042
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,10240,64,0,0.162225604057312
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,12288,2,0,2.262614440917969
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,12288,1,0,6.065288162231445
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,12288,4,0,1.1870991706848144
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,12288,8,0,0.6638368129730224
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,12288,16,0,0.4026815891265869
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,12288,32,0,0.25731520652770995
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,12288,64,0,0.19330559968948363
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,12288,1,0,3.9756431579589844
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,12288,2,0,2.0016319274902346
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,12288,4,0,1.084772777557373
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,12288,8,0,0.6028592109680175
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,12288,16,0,0.36423680782318113
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,12288,32,0,0.24167520999908448
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,12288,64,0,0.1892192006111145
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,16384,8,0,1.0941328048706054
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,16384,2,0,4.654867172241211
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,16384,4,0,1.9971439361572265
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,16384,1,0,11.056964874267578
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,16384,32,0,0.39328479766845703
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,16384,16,0,0.6335599899291993
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,16384,64,0,0.26244640350341797
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,16384,1,0,7.391808319091797
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,16384,4,0,1.7131488800048829
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,16384,2,0,3.2974273681640627
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,16384,8,0,0.9515600204467773
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,16384,16,0,0.5559584140777588
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,16384,32,0,0.36544320583343504
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,16384,64,0,0.24431679248809815
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,32768,8,0,4.7782447814941404
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,32768,4,0,11.226048278808594
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,32768,16,0,2.010492706298828
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,32768,32,0,1.154428768157959
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,32768,2,0,22.794053649902345
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,32768,64,0,0.71048002243042
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,32768,4,0,6.534729766845703
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,32768,1,0,45.5487548828125
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,32768,2,0,14.4256103515625
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,32768,16,0,1.6649999618530273
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,32768,8,0,3.121855926513672
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,32768,32,0,0.9774271965026855
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,1,1,0,0.022416000068187714
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,32768,64,0,0.6404719829559327
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,32768,1,0,29.967706298828126
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,1,2,0,0.0168272003531456
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,1,4,0,0.014665600657463074
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,1,8,0,0.014791999757289887
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,1,16,0,0.016817599534988403
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,1,32,0,0.014833599328994751
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,1,64,0,0.014655999839305878
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,1,2,0,0.023137600719928743
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,1,1,0,0.028932800889015196
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,1,8,0,0.023046399652957916
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,1,4,0,0.02285120040178299
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,1,16,0,0.022940799593925476
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,1,32,0,0.020846399664878845
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,1,64,0,0.022862400114536285
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,16,1,0,0.021014399826526642
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,16,2,0,0.016864000260829924
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,16,4,0,0.016784000396728515
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,16,8,0,0.015460799634456634
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,16,16,0,0.01669120043516159
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,16,32,0,0.014841599762439728
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,16,64,0,0.014822399616241455
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,16,1,0,0.02910720109939575
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,16,4,0,0.025011199712753295
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,16,2,0,0.02367520034313202
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,16,8,0,0.02259040027856827
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,16,16,0,0.023100799322128295
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,16,32,0,0.022944000363349915
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,16,64,0,0.023160000145435334
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,32,1,0,0.023057599365711213
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,32,2,0,0.016927999258041383
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,32,4,0,0.016947199404239655
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,32,8,0,0.01679999977350235
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,32,16,0,0.01675039976835251
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,32,32,0,0.014913600683212281
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,32,64,0,0.014856000244617463
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,32,1,0,0.030959999561309813
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,32,2,0,0.025076800584793092
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,32,8,0,0.022860799729824067
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,32,16,0,0.02292319983243942
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,32,4,0,0.023075200617313385
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,32,32,0,0.022881600260734557
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,32,64,0,0.023056000471115112
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,64,1,0,0.026998400688171387
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,64,2,0,0.01883520036935806
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,64,4,0,0.01691199988126755
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,64,8,0,0.01701119989156723
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,64,16,0,0.014883199334144592
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,64,32,0,0.016814400255680085
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,64,64,0,0.014764800667762756
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,64,1,0,0.03523359894752502
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,64,2,0,0.02914400100708008
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,64,4,0,0.027060800790786745
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,64,8,0,0.02494720071554184
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,64,16,0,0.024907200038433074
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,64,32,0,0.0231904000043869
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,64,64,0,0.023048000037670137
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,128,1,0,0.03314239978790283
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,128,2,0,0.02311840057373047
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,128,4,0,0.018916800618171692
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,128,8,0,0.01681919991970062
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,128,16,0,0.01669600009918213
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,128,32,0,0.01669120043516159
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,128,64,0,0.016832000017166136
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,128,2,0,0.031651198863983154
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,128,1,0,0.04368639886379242
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,128,4,0,0.02911840081214905
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,128,8,0,0.027143999934196472
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,128,16,0,0.025095999240875244
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,128,32,0,0.024937599897384644
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,128,64,0,0.02436159998178482
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,256,1,0,0.049798399209976196
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,256,2,0,0.03115200102329254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,256,4,0,0.02699039876461029
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,256,8,0,0.022284799814224245
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,256,16,0,0.020894399285316466
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,256,32,0,0.018873600661754607
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,256,64,0,0.018961599469184874
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,256,1,0,0.0659600019454956
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,256,2,0,0.04340640008449555
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,256,4,0,0.035209599137306216
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,256,16,0,0.029211199283599852
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,256,8,0,0.029227200150489806
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,256,32,0,0.027033600211143493
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,256,64,0,0.026526400446891786
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,512,1,0,0.09650560021400452
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,512,2,0,0.05577279925346375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,512,4,0,0.03544479906558991
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,512,8,0,0.03054080009460449
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,512,16,0,0.02705279886722565
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,512,32,0,0.025086399912834168
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,512,64,0,0.02470560073852539
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,512,1,0,0.11931840181350709
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,512,2,0,0.06996960043907166
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,512,4,0,0.04567199945449829
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,512,8,0,0.039371201395988466
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,512,16,0,0.033155199885368344
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,512,32,0,0.03312320113182068
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,512,64,0,0.03112800121307373
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,1024,1,0,0.2071120023727417
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,1024,2,0,0.11512800455093383
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,1024,4,0,0.06853119730949402
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,1024,8,0,0.04341920018196106
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,1024,16,0,0.0374096006155014
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,1024,32,0,0.03342080116271973
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,1024,64,0,0.03320319950580597
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,1024,1,0,0.23805439472198486
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,1024,2,0,0.1359439969062805
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,1024,4,0,0.08036159873008727
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,1024,8,0,0.051772797107696535
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,1024,16,0,0.04672319889068603
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,1024,32,0,0.04158399999141693
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,1024,64,0,0.039366400241851805
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,1536,1,0,0.33834240436553953
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,1536,2,0,0.18750079870223998
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,1536,4,0,0.1083232045173645
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,1536,8,0,0.06734399795532227
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,1536,16,0,0.04761439859867096
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,1536,32,0,0.04340960085391998
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,1536,64,0,0.041356799006462094
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,1536,1,0,0.37902240753173827
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,1536,2,0,0.20828158855438234
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,1536,8,0,0.07644000053405761
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,1536,4,0,0.1224943995475769
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,1536,16,0,0.057790398597717285
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,1536,32,0,0.05069440007209778
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,1536,64,0,0.04751839935779571
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,2048,2,0,0.27012479305267334
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,2048,1,0,0.5007472038269043
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,2048,4,0,0.152129602432251
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,2048,8,0,0.09313439726829528
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,2048,16,0,0.05792800188064575
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,2048,32,0,0.05365279912948608
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,2048,64,0,0.04961119890213013
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,2048,2,0,0.2942608118057251
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,2048,1,0,0.5314271926879883
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,2048,4,0,0.16809120178222656
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,2048,8,0,0.10291999578475952
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,2048,16,0,0.06807519793510437
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,2048,32,0,0.05987840294837952
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,2048,64,0,0.05597760081291199
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,3072,2,0,0.47133598327636717
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,3072,4,0,0.25721919536590576
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,3072,8,0,0.15313279628753662
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,3072,1,0,0.8740127563476563
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,3072,16,0,0.09758239984512329
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,3072,32,0,0.0700160026550293
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,3072,64,0,0.06596959829330444
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,3072,1,0,0.8970255851745605
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,3072,4,0,0.2708447933197021
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,3072,2,0,0.4852735996246338
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,3072,8,0,0.1623568058013916
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,3072,16,0,0.1047935962677002
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,3072,64,0,0.07347360253334045
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,3072,32,0,0.07967200279235839
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,4096,1,0,1.3655664443969726
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,4096,2,0,0.712775993347168
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,4096,8,0,0.22417759895324707
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,4096,4,0,0.3936352014541626
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,4096,16,0,0.1421056032180786
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,4096,32,0,0.09196959733963013
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,4096,64,0,0.08238239884376526
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,4096,1,0,1.338259220123291
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,4096,2,0,0.7182032108306885
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,4096,4,0,0.39278080463409426
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,4096,8,0,0.22822721004486085
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,4096,16,0,0.1459328055381775
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,4096,32,0,0.09777600169181824
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,4096,64,0,0.08875359892845154
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,6144,2,0,1.3555439949035644
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,6144,1,0,2.8869552612304688
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,6144,4,0,0.7295455932617188
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,6144,8,0,0.4044816017150879
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,6144,16,0,0.24393599033355712
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,6144,32,0,0.1559615969657898
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,6144,64,0,0.11911360025405884
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,6144,1,0,2.444822311401367
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,6144,4,0,0.6892799854278564
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,6144,8,0,0.3964992046356201
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,6144,2,0,1.284641647338867
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,6144,16,0,0.23543519973754884
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,6144,32,0,0.1561295986175537
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,6144,64,0,0.12434879541397095
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,8192,1,0,5.408276748657227
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,8192,2,0,2.153385543823242
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,8192,4,0,1.1281871795654297
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,8192,8,0,0.6379151821136475
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,8192,16,0,0.37342560291290283
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,8192,32,0,0.23702080249786378
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,8192,64,0,0.1554144024848938
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,8192,2,0,1.994046401977539
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,8192,4,0,1.0580608367919921
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,8192,1,0,3.982614517211914
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,8192,8,0,0.5890975952148437
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,8192,16,0,0.35161280632019043
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,8192,32,0,0.2319024085998535
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,8192,64,0,0.15822399854660035
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,10240,1,0,8.902926635742187
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,10240,8,0,0.9166303634643554
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,10240,4,0,1.6438528060913087
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,10240,2,0,3.8611297607421875
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,10240,32,0,0.32517759799957274
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,10240,64,0,0.2052544116973877
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,10240,16,0,0.5135039806365966
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,10240,1,0,5.892015838623047
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,10240,4,0,1.4922687530517578
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,10240,2,0,2.84497127532959
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,10240,8,0,0.8221407890319824
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,10240,16,0,0.48561921119689944
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,10240,32,0,0.30556480884552
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,10240,64,0,0.202673602104187
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,12288,4,0,2.2945135116577147
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,12288,8,0,1.2247023582458496
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,12288,2,0,6.1010894775390625
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,12288,1,0,12.893763732910156
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,12288,16,0,0.6934271812438965
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,12288,32,0,0.4179855823516846
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,12288,64,0,0.2733360052108765
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,12288,4,0,2.0206064224243163
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,12288,2,0,4.131615829467774
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,12288,8,0,1.0885231971740723
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,12288,1,0,8.577611541748047
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,12288,16,0,0.6327184200286865
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,12288,64,0,0.26362080574035646
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,12288,32,0,0.38987839221954346
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,16384,8,0,2.037124824523926
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,16384,4,0,4.753955078125
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,16384,2,0,10.8149169921875
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,16384,16,0,1.1070688247680665
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,16384,32,0,0.6477871894836426
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,16384,1,0,22.697637939453124
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,16384,64,0,0.4190832138061523
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,16384,4,0,3.490678405761719
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,16384,8,0,1.744099235534668
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,16384,2,0,7.574779510498047
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,16384,1,0,15.707522583007812
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,16384,32,0,0.5890736103057861
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,16384,16,0,0.9659520149230957
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,16384,64,0,0.3881727933883667
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,32768,8,0,11.323628997802734
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,32768,4,0,21.624472045898436
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,32768,16,0,4.412054443359375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,32768,32,0,1.9830623626708985
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,32768,64,0,1.1730719566345216
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,32768,2,0,45.62594299316406
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,32768,4,0,13.793276977539062
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,32768,2,0,30.596588134765625
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,32768,8,0,6.661070251464844
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,32768,16,0,3.3814910888671874
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,32768,32,0,1.72326717376709
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,32768,64,0,1.033510398864746
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,1,1,0,0.029227200150489806
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,32768,1,0,57.90132446289063
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,32768,1,0,101.28326416015625
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,1,2,0,0.02300799936056137
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,1,4,0,0.01676799952983856
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,1,8,0,0.016387200355529784
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,1,16,0,0.01679839938879013
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,1,32,0,0.01483200043439865
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,1,64,0,0.01763039976358414
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,1,1,0,0.03523040115833283
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,1,2,0,0.027100801467895508
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,1,4,0,0.02512640058994293
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,1,8,0,0.025017601251602174
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,1,16,0,0.022987200319766997
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,1,32,0,0.02309119999408722
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,1,64,0,0.025022399425506592
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,16,1,0,0.027118399739265442
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,16,2,0,0.020897600054740905
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,16,4,0,0.016956800222396852
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,16,8,0,0.014905600249767304
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,16,16,0,0.014827199280261993
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,16,32,0,0.016118399798870087
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,16,64,0,0.014849600195884705
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,16,1,0,0.03535679876804352
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,16,2,0,0.02924320101737976
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,16,4,0,0.025070399045944214
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,16,8,0,0.0249439999461174
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,16,16,0,0.02503199875354767
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,16,32,0,0.023127999901771546
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,16,64,0,0.023158399760723113
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,32,1,0,0.031251201033592226
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,32,2,0,0.02285120040178299
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,32,4,0,0.01690080016851425
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,32,8,0,0.01674560010433197
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,32,16,0,0.015038399398326874
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,32,64,0,0.016774399578571318
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,32,32,0,0.014847999811172486
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,32,1,0,0.0393312007188797
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,32,2,0,0.031079998612403868
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,32,4,0,0.025019198656082153
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,32,8,0,0.02503040134906769
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,32,16,0,0.0251120001077652
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,32,32,0,0.024990400671958922
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,32,64,0,0.023054400086402894
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,64,1,0,0.03732160031795502
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,64,2,0,0.027028799057006836
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,64,4,0,0.018982400000095368
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,64,8,0,0.018926399946212768
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,64,16,0,0.016700799763202667
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,64,32,0,0.014772799611091614
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,64,64,0,0.01671839952468872
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,64,1,0,0.04947519898414612
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,64,2,0,0.035452800989151004
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,64,4,0,0.027164798974990845
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,64,16,0,0.023001599311828613
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,64,8,0,0.025094398856163026
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,64,32,0,0.02497279942035675
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,64,64,0,0.022945599257946016
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,128,1,0,0.0518127977848053
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,128,2,0,0.0320576012134552
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,128,4,0,0.023001599311828613
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,128,8,0,0.018991999328136444
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,128,16,0,0.018171200156211854
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,128,32,0,0.016841599345207216
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,128,64,0,0.01675039976835251
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,128,1,0,0.06816639900207519
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,128,2,0,0.04545120000839233
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,128,8,0,0.029174399375915528
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,128,4,0,0.0332399994134903
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,128,16,0,0.027088001370429993
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,128,32,0,0.025019198656082153
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,128,64,0,0.02502399981021881
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,256,1,0,0.08848320245742798
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,256,2,0,0.049744001030921935
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,256,4,0,0.03128640055656433
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,256,8,0,0.025110399723052977
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,256,16,0,0.023081600666046143
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,256,32,0,0.020948800444602966
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,256,64,0,0.020812800526618956
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,256,1,0,0.11271519660949707
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,256,2,0,0.06615200042724609
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,256,4,0,0.043756800889968875
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,256,8,0,0.03534559905529022
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,256,16,0,0.03113119900226593
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,256,32,0,0.029019200801849367
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,256,64,0,0.029097598791122437
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,512,1,0,0.17653919458389283
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,512,2,0,0.09862080216407776
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,512,4,0,0.05786240100860596
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,512,8,0,0.03721440136432648
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,512,16,0,0.03136959969997406
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,512,32,0,0.02715519964694977
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,512,64,0,0.027084800601005554
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,512,1,0,0.2153167963027954
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,512,2,0,0.12092800140380859
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,512,4,0,0.07190880179405212
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,512,8,0,0.047603198885917665
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,512,16,0,0.0395359992980957
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,512,32,0,0.03530240058898926
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,512,64,0,0.03328160047531128
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,1024,1,0,0.38542559146881106
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,1024,2,0,0.21021759510040283
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,1024,4,0,0.12066080570220947
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,1024,8,0,0.07177280187606812
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,1024,16,0,0.045819199085235594
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,1024,32,0,0.04161919951438904
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,1024,64,0,0.03735359907150269
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,1024,1,0,0.4430416107177734
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,1024,2,0,0.24246559143066407
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,1024,4,0,0.13838720321655273
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,1024,8,0,0.08464959859848023
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,1024,16,0,0.05632640123367309
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,1024,32,0,0.050209599733352664
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,1024,64,0,0.04364959895610809
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,1536,2,0,0.3403759956359863
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,1536,1,0,0.6494095802307129
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,1536,4,0,0.19073760509490967
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,1536,8,0,0.11356480121612549
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,1536,16,0,0.07042080163955688
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,1536,32,0,0.051585602760314944
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,1536,64,0,0.04839999973773956
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,1536,1,0,0.7133359909057617
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,1536,2,0,0.3810703992843628
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,1536,4,0,0.21427679061889648
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,1536,8,0,0.12646880149841308
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,1536,16,0,0.0842848002910614
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,1536,32,0,0.06194080114364624
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,1536,64,0,0.05587520003318787
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,2048,2,0,0.5040751934051514
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,2048,1,0,0.9436944007873536
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,2048,4,0,0.2719088077545166
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,2048,8,0,0.15995999574661254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,2048,16,0,0.09896960258483886
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,2048,32,0,0.06428959965705872
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,2048,64,0,0.057968002557754514
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,2048,1,0,1.0179743766784668
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,2048,2,0,0.5421648025512695
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,2048,4,0,0.2979327917098999
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,2048,8,0,0.17304480075836182
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,2048,16,0,0.11149760484695434
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,2048,32,0,0.07631199955940246
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,2048,64,0,0.0683359980583191
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,3072,1,0,1.7071151733398438
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,3072,2,0,0.896735954284668
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,3072,4,0,0.4796031951904297
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,3072,8,0,0.2689519882202148
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,3072,16,0,0.16088320016860963
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,3072,32,0,0.10711840391159058
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,3072,64,0,0.08060160279273987
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,3072,1,0,1.720243263244629
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,3072,2,0,0.9059215545654297
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,3072,4,0,0.49975199699401857
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,3072,8,0,0.2830512046813965
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,3072,32,0,0.11360800266265869
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,3072,64,0,0.09026560187339783
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,3072,16,0,0.17335200309753418
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,4096,1,0,2.8154239654541016
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,4096,2,0,1.344337558746338
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,4096,4,0,0.7366735935211182
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,4096,8,0,0.40153918266296384
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,4096,16,0,0.23986399173736572
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,4096,32,0,0.1547104001045227
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,4096,64,0,0.10289599895477294
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,4096,1,0,2.59608154296875
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,4096,2,0,1.3431391716003418
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,4096,4,0,0.7300735950469971
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,4096,8,0,0.40891361236572266
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,4096,16,0,0.24549920558929444
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,4096,32,0,0.16139520406723024
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,4096,64,0,0.11310559511184692
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,6144,1,0,5.759371185302735
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,6144,8,0,0.753984022140503
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,6144,4,0,1.3462047576904297
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,6144,2,0,2.6320768356323243
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,6144,64,0,0.1731119990348816
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,6144,16,0,0.4198863983154297
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,6144,32,0,0.2619215965270996
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,6144,1,0,4.894731140136718
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,6144,4,0,1.2969823837280274
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,6144,2,0,2.4391279220581055
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,6144,8,0,0.7197807788848877
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,6144,16,0,0.4177231788635254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,6144,32,0,0.2593967914581299
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,6144,64,0,0.18095999956130981
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,8192,2,0,5.198121643066406
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,8192,4,0,2.3375808715820314
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,8192,8,0,1.1500816345214844
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,8192,1,0,12.72302703857422
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,8192,16,0,0.6578735828399658
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,8192,32,0,0.39071040153503417
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,8192,64,0,0.2584847927093506
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,8192,2,0,3.9488048553466797
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,8192,4,0,2.0035343170166016
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,8192,8,0,1.0805328369140625
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,8192,1,0,8.714875030517579
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,8192,16,0,0.6228208065032959
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,8192,32,0,0.38030400276184084
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,8192,64,0,0.25864319801330565
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,10240,4,0,3.591603088378906
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,10240,8,0,1.7417583465576172
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,10240,2,0,8.651686096191407
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,10240,1,0,19.633551025390624
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,10240,16,0,0.9264752388000488
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,10240,32,0,0.5419295787811279
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,10240,64,0,0.35443360805511476
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,10240,4,0,2.8536767959594727
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,10240,8,0,1.5438240051269532
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,10240,2,0,6.295462417602539
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,10240,1,0,13.277621459960937
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,10240,32,0,0.5221856117248536
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,10240,16,0,0.8584320068359375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,10240,64,0,0.3403471946716309
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,12288,4,0,5.728886413574219
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,12288,8,0,2.7692432403564453
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,12288,2,0,12.655068969726562
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,12288,16,0,1.2311136245727539
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,12288,32,0,0.7199503898620605
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,12288,64,0,0.45539679527282717
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,12288,1,0,29.5415771484375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,12288,2,0,9.134563446044922
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,12288,4,0,4.045155334472656
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,12288,8,0,2.0633087158203125
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,12288,1,0,17.249249267578126
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,12288,32,0,0.676259183883667
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,12288,16,0,1.127843189239502
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,12288,64,0,0.43270077705383303
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,16384,8,0,5.529150390625
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,16384,4,0,11.557179260253907
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,16384,16,0,2.077507209777832
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,16384,32,0,1.1362015724182128
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,16384,2,0,22.746263122558595
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,16384,64,0,0.6978352069854736
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,16384,4,0,7.636507415771485
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,16384,2,0,14.988856506347656
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,16384,1,0,47.811300659179686
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,16384,8,0,3.342793655395508
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,16384,16,0,1.801046371459961
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,16384,32,0,1.0264528274536133
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,16384,1,0,32.73586730957031
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,1,1,0,0.03524320125579834
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,16384,64,0,0.6472847938537598
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,1,2,0,0.029182401299476624
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,1,8,0,0.01679839938879013
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,1,4,0,0.02090719938278198
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,1,32,0,0.014723199605941772
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,1,16,0,0.016760000586509706
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,1,64,0,0.01684480011463165
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,1,2,0,0.031113600730895995
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,1,1,0,0.04141919910907745
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,1,4,0,0.029076799750328064
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,1,8,0,0.02680160105228424
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,1,16,0,0.02272319942712784
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,1,32,0,0.02311359941959381
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,1,64,0,0.02292959988117218
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,16,1,0,0.039473599195480345
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,16,2,0,0.027846398949623107
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,16,4,0,0.02086720019578934
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,16,8,0,0.01690399944782257
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,16,16,0,0.01727519929409027
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,16,32,0,0.014870400726795196
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,16,64,0,0.014708800613880158
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,16,1,0,0.049644801020622256
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,16,2,0,0.035441601276397706
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,16,4,0,0.02905279994010925
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,16,8,0,0.025115200877189638
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,16,16,0,0.024988800287246704
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,16,32,0,0.02295359969139099
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,16,64,0,0.025148800015449523
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,32,1,0,0.045623999834060666
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,32,2,0,0.03131519854068756
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,32,4,0,0.021003200113773345
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,32,8,0,0.01748960018157959
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,32,16,0,0.016780799627304076
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,32,32,0,0.01679680049419403
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,32,64,0,0.016945600509643555
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,32,1,0,0.056092798709869385
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,32,2,0,0.04135519862174988
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,32,4,0,0.031188800930976868
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,32,8,0,0.02508159875869751
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,32,16,0,0.02502079904079437
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,32,32,0,0.022896000742912294
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,32,64,0,0.023171199858188628
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,64,1,0,0.05875999927520752
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,64,2,0,0.037396800518035886
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,64,4,0,0.027132800221443175
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,64,8,0,0.01886560022830963
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,64,16,0,0.01682399958372116
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,64,32,0,0.016998399794101716
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,64,64,0,0.01695519983768463
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,64,1,0,0.07617920041084289
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,64,2,0,0.04956159889698029
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,64,4,0,0.03531999886035919
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,64,8,0,0.02725760042667389
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,64,16,0,0.025099200010299683
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,64,32,0,0.024987199902534486
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,64,64,0,0.023108799755573273
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,128,1,0,0.0906000018119812
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,128,2,0,0.05362399816513062
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,128,4,0,0.03322719931602478
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,128,8,0,0.023108799755573273
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,128,16,0,0.020891200006008147
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,128,32,0,0.01891999989748001
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,128,64,0,0.01884640008211136
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,128,1,0,0.11716159582138061
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,128,2,0,0.06805920004844665
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,128,4,0,0.043572801351547244
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,128,8,0,0.03326080143451691
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,128,16,0,0.029073598980903625
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,128,32,0,0.027158400416374205
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,128,64,0,0.02643040120601654
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,256,1,0,0.15924320220947266
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,256,2,0,0.09028000235557557
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,256,4,0,0.05177599787712097
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,256,8,0,0.03399359881877899
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,256,16,0,0.027239999175071715
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,256,32,0,0.023196800053119658
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,256,64,0,0.022873599827289582
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,256,1,0,0.20038399696350098
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,256,2,0,0.1132464051246643
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,256,4,0,0.06591200232505798
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,256,8,0,0.04351359903812409
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,256,16,0,0.037406399846076965
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,256,32,0,0.033313599228858945
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,256,64,0,0.03127520084381104
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,512,2,0,0.1783776044845581
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,512,1,0,0.33297760486602784
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,512,4,0,0.10111839771270752
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,512,16,0,0.0392655998468399
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,512,8,0,0.059982401132583615
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,512,32,0,0.03525440096855163
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,512,64,0,0.03131999969482422
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,512,1,0,0.3999648094177246
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,512,4,0,0.12346880435943604
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,512,2,0,0.2166368007659912
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,512,8,0,0.07620800137519837
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,512,16,0,0.05159040093421936
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,512,32,0,0.043532800674438474
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,512,64,0,0.03929280042648316
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,1024,1,0,0.7361487865447998
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,1024,2,0,0.39261279106140134
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,1024,4,0,0.2154848098754883
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,1024,8,0,0.12308319807052612
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,1024,16,0,0.07623680233955384
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,1024,32,0,0.051585602760314944
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,1024,64,0,0.04546720087528229
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,1024,1,0,0.8462575912475586
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,1024,2,0,0.4511104106903076
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,1024,4,0,0.2511039972305298
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,1024,8,0,0.14601600170135498
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,1024,16,0,0.09159039855003356
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,1024,32,0,0.06392480134963989
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,1024,64,0,0.055990397930145264
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,1536,1,0,1.2554160118103028
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,1536,2,0,0.6494495868682861
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,1536,4,0,0.3499376058578491
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,1536,8,0,0.2012768030166626
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,1536,16,0,0.12161920070648194
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,1536,32,0,0.0805679976940155
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,1536,64,0,0.05997440218925476
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,1536,1,0,1.3695407867431642
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,1536,2,0,0.7227344036102294
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,1536,4,0,0.39008479118347167
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,1536,8,0,0.22491040229797363
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,1536,16,0,0.13919999599456787
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,1536,32,0,0.09258239865303039
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,1536,64,0,0.0722495973110199
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,2048,1,0,1.8458080291748047
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,2048,4,0,0.5100111961364746
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,2048,2,0,0.9693584442138672
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,2048,8,0,0.28486080169677735
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,2048,16,0,0.1714303970336914
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,2048,32,0,0.11021120548248291
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,2048,64,0,0.07686240077018738
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,2048,1,0,1.9517135620117188
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,2048,2,0,1.029980754852295
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,2048,4,0,0.5538559913635254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,2048,8,0,0.3153712034225464
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,2048,16,0,0.18689119815826416
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,2048,32,0,0.12712960243225097
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,2048,64,0,0.09053279757499695
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,3072,1,0,3.544174575805664
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,3072,2,0,1.687046432495117
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,3072,4,0,0.9084032058715821
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,3072,8,0,0.4922031879425049
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,3072,16,0,0.2893807888031006
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,3072,32,0,0.17774239778518677
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,3072,64,0,0.12342720031738282
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,3072,1,0,3.362102508544922
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,3072,8,0,0.5199376106262207
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,3072,4,0,0.928012752532959
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,3072,2,0,1.7588623046875
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,3072,16,0,0.30494720935821534
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,3072,32,0,0.1944383978843689
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,3072,64,0,0.1377392053604126
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,4096,2,0,2.660995292663574
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,4096,1,0,6.606756591796875
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,4096,4,0,1.4024288177490234
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,4096,8,0,0.7427279949188232
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,4096,32,0,0.2630496025085449
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,4096,16,0,0.4236800193786621
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,4096,64,0,0.17831360101699828
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,4096,1,0,5.113907241821289
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,4096,4,0,1.3753328323364258
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,4096,8,0,0.7593440055847168
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,4096,2,0,2.627884864807129
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,4096,16,0,0.4364768028259277
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,4096,64,0,0.19083839654922485
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,4096,32,0,0.2721152067184448
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,6144,8,0,1.3956687927246094
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,6144,4,0,2.8568399429321287
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,6144,2,0,6.576934051513672
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,6144,1,0,14.96083984375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,6144,16,0,0.7698575973510742
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,6144,32,0,0.4590303897857666
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,6144,64,0,0.2944047927856445
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,6144,4,0,2.5075040817260743
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,6144,2,0,5.173759841918946
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,6144,8,0,1.3313520431518555
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,6144,1,0,10.890103912353515
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,6144,16,0,0.7587232112884521
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,6144,32,0,0.4567279815673828
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,6144,64,0,0.30181920528411865
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,8192,8,0,2.2943536758422853
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,8192,4,0,5.893527984619141
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,8192,2,0,11.88070068359375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,8192,16,0,1.1964879989624024
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,8192,32,0,0.6946720123291016
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,8192,64,0,0.442412805557251
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,8192,1,0,28.046551513671876
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,8192,8,0,2.0888736724853514
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,8192,4,0,4.005057525634766
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,8192,2,0,9.019779205322266
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,8192,1,0,16.6556884765625
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,8192,16,0,1.156886386871338
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,8192,32,0,0.6830111980438233
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,8192,64,0,0.43784480094909667
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,1,1,0,0.08348000049591064
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,1,4,0,0.026318401098251343
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,1,2,0,0.035416001081466676
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,1,8,0,0.022380800545215608
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,1,16,0,0.014801600575447082
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,1,32,0,0.01650400012731552
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,1,64,0,0.014839999377727509
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,1,1,0,0.05980799794197082
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,1,2,0,0.0414463996887207
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,1,4,0,0.03312639892101288
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,1,8,0,0.02720319926738739
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,1,16,0,0.02513279914855957
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,1,32,0,0.02101600021123886
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,1,64,0,0.02348479926586151
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,16,1,0,0.06525440216064453
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,16,2,0,0.039683198928833006
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,16,4,0,0.027136000990867614
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,16,8,0,0.02098720073699951
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,16,16,0,0.016809600591659545
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,16,32,0,0.014945599436759948
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,16,64,0,0.016840000450611115
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,16,1,0,0.0762287974357605
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,16,2,0,0.04969440102577209
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,16,4,0,0.035441601276397706
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,16,8,0,0.029179200530052185
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,16,16,0,0.024990400671958922
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,16,32,0,0.022924800217151643
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,16,64,0,0.022886399924755097
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,32,1,0,0.07841600179672241
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,32,2,0,0.04565280079841614
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,32,4,0,0.03119199872016907
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,32,8,0,0.022987200319766997
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,32,16,0,0.016884799301624297
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,32,32,0,0.016952000558376312
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,32,64,0,0.016782400012016297
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,32,1,0,0.09089279770851136
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,32,2,0,0.05749760270118713
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,32,4,0,0.04147520065307617
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,32,8,0,0.030375999212265015
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,32,16,0,0.027214398980140685
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,32,32,0,0.025161600112915038
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,32,64,0,0.02518559992313385
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,64,1,0,0.11104480028152466
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,64,2,0,0.059772801399230954
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,64,4,0,0.037484800815582274
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,64,8,0,0.027134400606155396
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,64,32,0,0.016944000124931337
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,64,16,0,0.020843200385570526
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,64,64,0,0.01682240068912506
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,64,1,0,0.13408639430999755
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,64,2,0,0.07433760166168213
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,64,4,0,0.04952639937400818
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,64,8,0,0.03736799955368042
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,64,16,0,0.029135999083518983
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,64,32,0,0.02706719934940338
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,64,64,0,0.0250463992357254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,128,1,0,0.16666239500045776
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,128,2,0,0.09296159744262696
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,128,4,0,0.053814399242401126
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,128,8,0,0.03324959874153137
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,128,16,0,0.02518239915370941
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,128,32,0,0.021400000154972076
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,128,64,0,0.019707199931144715
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,128,1,0,0.21144158840179444
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,128,2,0,0.11896320581436157
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,128,4,0,0.0699887990951538
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,128,8,0,0.04556959867477417
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,128,16,0,0.03535679876804352
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,128,32,0,0.029318401217460634
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,128,64,0,0.029142400622367857
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,256,1,0,0.29921441078186034
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,256,2,0,0.1614527940750122
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,256,4,0,0.09288640022277832
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,256,8,0,0.05379679799079895
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,256,16,0,0.03529439866542816
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,256,32,0,0.029407998919486998
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,256,64,0,0.027211201190948487
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,256,1,0,0.37632160186767577
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,256,2,0,0.2040208101272583
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,256,4,0,0.11784800291061401
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,256,8,0,0.07034080028533936
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,256,16,0,0.049641600251197814
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,256,32,0,0.03976800143718719
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,256,64,0,0.035385599732398985
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,512,1,0,0.6374911785125732
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,512,2,0,0.3363584041595459
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,512,4,0,0.1824463963508606
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,512,8,0,0.10684479475021362
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,512,16,0,0.06684960126876831
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,512,32,0,0.04445120096206665
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,512,64,0,0.0394351989030838
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,512,1,0,0.7603504180908203
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,512,2,0,0.40566401481628417
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,512,4,0,0.22640480995178222
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,512,8,0,0.13043199777603148
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,512,16,0,0.08244959712028503
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,512,32,0,0.05785920023918152
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,512,64,0,0.0497408002614975
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,1024,1,0,1.4260239601135254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,1024,2,0,0.7493792057037354
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,1024,4,0,0.4054704189300537
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,1024,8,0,0.22905759811401366
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,1024,16,0,0.13589760065078735
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,1024,32,0,0.08976160287857056
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,1024,64,0,0.0642304003238678
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,1024,1,0,1.6475200653076172
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,1024,2,0,0.8590736389160156
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,1024,4,0,0.4645247936248779
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,1024,8,0,0.2645359992980957
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,1024,16,0,0.16038559675216674
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,1024,32,0,0.10710079669952392
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,1024,64,0,0.07837759852409362
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,1536,1,0,2.5487743377685548
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,1536,2,0,1.2702976226806642
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,1536,4,0,0.6654463768005371
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,1536,8,0,0.3682240009307861
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,1536,16,0,0.21853439807891845
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,1536,32,0,0.13935040235519408
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,1536,64,0,0.09775999784469605
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,1536,1,0,2.667188835144043
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,1536,2,0,1.3946144104003906
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,1536,4,0,0.7488944053649902
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,1536,8,0,0.4131951808929443
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,1536,16,0,0.24763360023498535
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,1536,32,0,0.16118240356445312
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,1536,64,0,0.11705759763717652
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,2048,8,0,0.5312096118927002
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,2048,4,0,0.9876640319824219
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,2048,2,0,1.8451919555664062
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,2048,1,0,3.7895278930664062
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,2048,16,0,0.30599040985107423
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,2048,32,0,0.19305919408798217
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,2048,64,0,0.13383040428161622
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,2048,1,0,3.861713409423828
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,2048,4,0,1.059175968170166
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,2048,8,0,0.581831979751587
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,2048,2,0,1.984320068359375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,2048,16,0,0.343560004234314
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,2048,32,0,0.21824960708618163
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,2048,64,0,0.15412960052490235
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,3072,1,0,8.025888061523437
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,3072,4,0,1.7972415924072265
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,3072,8,0,0.9338864326477051
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,3072,2,0,3.5464832305908205
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,3072,16,0,0.5241983890533447
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,3072,32,0,0.322871994972229
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,3072,64,0,0.21607520580291747
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,3072,8,0,0.973414421081543
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,3072,4,0,1.798454475402832
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,3072,1,0,7.00396957397461
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,3072,2,0,3.404763031005859
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,3072,16,0,0.5571216106414795
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,3072,32,0,0.3477024078369141
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,3072,64,0,0.23894720077514647
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,4096,8,0,1.4591664314270019
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,4096,4,0,2.821664047241211
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,4096,1,0,13.875494384765625
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,4096,2,0,6.617256164550781
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,4096,32,0,0.47652320861816405
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,4096,16,0,0.8072208404541016
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,4096,64,0,0.3057935953140259
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,4096,4,0,2.6871904373168944
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,4096,8,0,1.4282591819763184
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,4096,2,0,5.171964645385742
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,4096,1,0,10.510688018798827
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,4096,16,0,0.8178288459777832
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,4096,32,0,0.4931488037109375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,4096,64,0,0.33148319721221925
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,1,1,0,0.11988639831542969
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,1,2,0,0.05780320167541504
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,1,4,0,0.03329600095748901
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,1,8,0,0.025102400779724122
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,1,16,0,0.02093919962644577
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,1,32,0,0.016956800222396852
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,1,64,0,0.014752000570297241
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,1,1,0,0.09866080284118653
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,1,2,0,0.05857920050621033
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,1,4,0,0.04151839911937714
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,1,16,0,0.031057599186897277
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,1,32,0,0.023019200563430785
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,1,8,0,0.032358399033546446
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,1,64,0,0.023073600232601167
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,16,1,0,0.11762239933013915
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,16,2,0,0.06617439985275268
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,16,4,0,0.04063520133495331
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,16,8,0,0.027454400062561037
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,16,16,0,0.02088959962129593
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,16,32,0,0.016894400119781494
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,16,64,0,0.01642879992723465
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,16,1,0,0.12959680557250977
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,16,2,0,0.07619199752807618
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,16,4,0,0.04764319956302643
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,16,8,0,0.035648000240325925
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,16,16,0,0.02927680015563965
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,16,32,0,0.02497279942035675
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,16,64,0,0.025088000297546386
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,32,1,0,0.1464192032814026
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,32,2,0,0.08051679730415344
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,32,4,0,0.047603198885917665
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,32,8,0,0.03115200102329254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,32,16,0,0.02290560007095337
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,32,32,0,0.018852800130844116
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,32,64,0,0.016923199594020843
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,32,1,0,0.16679199934005737
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,32,2,0,0.09254720211029052
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,32,8,0,0.04150240123271942
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,32,16,0,0.03121120035648346
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,32,4,0,0.057843202352523805
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,32,32,0,0.027024000883102417
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,32,64,0,0.025158399343490602
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,64,1,0,0.20608799457550048
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,64,4,0,0.06051200032234192
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,64,2,0,0.11321120262145996
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,64,8,0,0.03943679928779602
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,64,16,0,0.027136000990867614
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,64,32,0,0.020924800634384157
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,64,64,0,0.01895360052585602
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,64,1,0,0.24799199104309083
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,64,2,0,0.1363584041595459
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,64,4,0,0.07683039903640747
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,64,8,0,0.051811200380325315
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,64,16,0,0.03914560079574585
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,64,32,0,0.029190400242805482
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,64,64,0,0.027049601078033447
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,128,1,0,0.31703839302062986
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,128,2,0,0.1696079969406128
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,128,4,0,0.0945855975151062
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,128,8,0,0.05615360140800476
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,128,16,0,0.03731200098991394
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,128,32,0,0.029032000899314882
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,128,64,0,0.024872000515460967
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,128,1,0,0.39613919258117675
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,128,2,0,0.21652638912200928
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,128,8,0,0.0720624029636383
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,128,4,0,0.11932959556579589
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,128,16,0,0.04970560073852539
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,128,32,0,0.03739840090274811
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,128,64,0,0.03320319950580597
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,256,1,0,0.5776303768157959
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,256,2,0,0.3077903985977173
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,256,4,0,0.16844960451126098
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,256,8,0,0.09720000028610229
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,256,16,0,0.060008001327514646
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,256,32,0,0.04315040111541748
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,256,64,0,0.0372655987739563
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,256,1,0,0.7139760017395019
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,256,2,0,0.38184640407562254
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,256,4,0,0.21019361019134522
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,256,8,0,0.1252351999282837
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,256,16,0,0.07624639868736267
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,256,32,0,0.05368959903717041
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,256,64,0,0.04768800139427185
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,512,1,0,1.235966396331787
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,512,2,0,0.6425055980682373
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,512,4,0,0.34624319076538085
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,512,8,0,0.19619040489196776
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,512,16,0,0.11726399660110473
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,512,32,0,0.07849119901657105
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,512,64,0,0.05592319965362549
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,512,1,0,1.4834272384643554
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,512,2,0,0.7774159908294678
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,512,4,0,0.41841278076171873
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,512,8,0,0.23881759643554687
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,512,16,0,0.14596960544586182
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,512,32,0,0.09877920150756836
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,512,64,0,0.0720911979675293
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,1024,1,0,2.795518493652344
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,1024,2,0,1.4701343536376954
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,1024,4,0,0.7777120113372803
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,1024,8,0,0.42439999580383303
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,1024,16,0,0.24800000190734864
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,1024,32,0,0.15941439867019652
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,1024,64,0,0.11331679821014404
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,1024,1,0,3.222351837158203
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,1024,4,0,0.8933856010437011
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,1024,2,0,1.6672128677368163
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,1024,8,0,0.49350881576538086
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,1024,16,0,0.292084789276123
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,1024,32,0,0.19056479930877684
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,1024,64,0,0.13578239679336548
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,1536,1,0,4.989652633666992
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,1536,8,0,0.7028495788574218
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,1536,2,0,2.533076858520508
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,1536,4,0,1.3074735641479491
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,1536,32,0,0.25028319358825685
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,1536,64,0,0.1720576047897339
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,1536,16,0,0.4044640064239502
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,1536,8,0,0.7898335933685303
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,1536,1,0,5.293495941162109
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,1536,4,0,1.4280240058898925
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,1536,2,0,2.7124704360961913
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,1536,16,0,0.4591248035430908
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,1536,32,0,0.290062403678894
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,1536,64,0,0.20348799228668213
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,2048,1,0,8.528292846679687
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,2048,4,0,1.9380783081054687
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,2048,2,0,3.8088096618652343
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,2048,8,0,1.0177696228027344
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,2048,16,0,0.5795807838439941
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,2048,32,0,0.3572976112365723
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,2048,64,0,0.2405951976776123
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,2048,1,0,7.624703979492187
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,2048,4,0,2.0581119537353514
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,2048,8,0,1.1175567626953125
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,2048,2,0,3.902262496948242
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,2048,16,0,0.6367280006408691
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,2048,64,0,0.2775935888290405
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,2048,32,0,0.4028207778930664
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,1,1,0,0.16882400512695311
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,1,2,0,0.09257599711418152
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,1,4,0,0.05426080226898193
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,1,8,0,0.03320800065994263
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,1,16,0,0.024991999566555022
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,1,32,0,0.02080000042915344
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,1,64,0,0.01679839938879013
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,1,1,0,0.17319999933242797
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,1,2,0,0.09921439886093139
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,1,4,0,0.0598143994808197
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,1,16,0,0.03329919874668121
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,1,32,0,0.02707360088825226
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,1,8,0,0.04161440134048462
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,1,64,0,0.023019200563430785
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,16,1,0,0.22656960487365724
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,16,2,0,0.1179919958114624
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,16,4,0,0.06639360189437866
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,16,8,0,0.04085280001163483
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,16,16,0,0.027198401093482972
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,16,32,0,0.021048000454902648
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,16,64,0,0.018641600012779237
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,16,1,0,0.23960320949554442
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,16,2,0,0.1274783968925476
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,16,4,0,0.07654240131378173
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,16,8,0,0.049630400538444516
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,16,16,0,0.03721440136432648
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,16,32,0,0.029230400919914246
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,16,64,0,0.025012800097465517
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,32,1,0,0.28010239601135256
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,32,2,0,0.1500831961631775
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,32,4,0,0.08045439720153809
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,32,8,0,0.04763360023498535
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,32,16,0,0.033241599798202515
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,32,32,0,0.024942399561405183
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,32,64,0,0.018929600715637207
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,32,1,0,0.31063520908355713
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,32,2,0,0.16633440256118776
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,32,4,0,0.09465439915657044
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,32,8,0,0.057811200618743896
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,32,16,0,0.043367999792099
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,32,32,0,0.033225598931312564
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,32,64,0,0.027241599559783936
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,64,1,0,0.39572319984436033
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,64,2,0,0.20928480625152587
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,64,4,0,0.1151136040687561
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,64,8,0,0.06593760251998901
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,64,16,0,0.042331200838088986
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,64,32,0,0.03129439949989319
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,64,64,0,0.024959999322891235
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,64,2,0,0.2514800071716309
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,64,1,0,0.4698351860046387
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,64,4,0,0.13883520364761354
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,64,8,0,0.08020480275154114
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,64,16,0,0.05586720108985901
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,64,32,0,0.0413536012172699
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,64,64,0,0.03323360085487366
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,128,1,0,0.6095056056976318
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,128,4,0,0.17673759460449218
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,128,2,0,0.3208303928375244
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,128,8,0,0.10083039999008178
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,128,16,0,0.06188160181045532
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,128,64,0,0.033220800757408145
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,128,1,0,0.7603871822357178
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,128,32,0,0.04334560036659241
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,128,2,0,0.40184798240661623
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,128,4,0,0.2229167938232422
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,128,8,0,0.12751519680023193
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,128,16,0,0.08001760244369507
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,128,64,0,0.04542239904403687
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,256,1,0,1.1272591590881347
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,128,32,0,0.05775520205497742
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,256,2,0,0.5918528079986572
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,256,4,0,0.31811840534210206
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,256,8,0,0.17920960187911988
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,256,32,0,0.07209439873695374
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,256,16,0,0.1090880036354065
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,256,64,0,0.05370240211486817
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,256,1,0,1.3959280014038087
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,256,2,0,0.7306863784790039
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,256,4,0,0.39495999813079835
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,256,8,0,0.22371199131011962
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,256,16,0,0.13953280448913574
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,256,32,0,0.09375200271606446
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,256,64,0,0.07014240026473999
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,512,1,0,2.4060592651367188
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,512,2,0,1.259124755859375
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,512,4,0,0.6709440231323243
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,512,8,0,0.3694224119186401
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,512,16,0,0.21798560619354249
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,512,32,0,0.13990399837493897
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,512,64,0,0.10122079849243164
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,512,1,0,2.9063968658447266
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,512,4,0,0.8074272155761719
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,512,2,0,1.5062687873840332
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,512,8,0,0.4499023914337158
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,512,16,0,0.26699678897857665
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,512,32,0,0.17454559803009034
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,512,64,0,0.12730239629745482
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,1024,1,0,5.873183822631836
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,1024,8,0,0.8183456420898437
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,1024,4,0,1.5088815689086914
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,1024,2,0,2.846545600891113
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,1024,16,0,0.4726560115814209
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,1024,32,0,0.29354081153869627
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,1024,64,0,0.20583679676055908
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,1024,1,0,6.395364761352539
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,1024,2,0,3.2926464080810547
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,1024,8,0,0.9492128372192383
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,1024,4,0,1.7266864776611328
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,1024,16,0,0.5498655796051025
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,1024,64,0,0.24858880043029785
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,1,1,0,0.32782878875732424
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,1024,32,0,0.3537744045257568
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,1,2,0,0.17054879665374756
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,1,4,0,0.09271839857101441
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,1,8,0,0.05397760272026062
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,1,16,0,0.033292800188064575
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,1,32,0,0.025054401159286498
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,1,64,0,0.01892479956150055
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,1,1,0,0.3272416114807129
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,1,2,0,0.1730512022972107
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,1,4,0,0.10053759813308716
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,1,8,0,0.05989599823951721
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,1,16,0,0.040417599678039554
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,1,32,0,0.03116160035133362
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,1,64,0,0.02717919945716858
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,16,1,0,0.4297520160675049
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,16,2,0,0.22692320346832276
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,16,4,0,0.11926560401916504
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,16,8,0,0.0684336006641388
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,16,16,0,0.042305600643157956
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,16,32,0,0.031097599864006044
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,16,64,0,0.02290560007095337
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,16,1,0,0.4476655960083008
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,16,2,0,0.23876800537109374
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,16,4,0,0.1321552038192749
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,16,8,0,0.07758399844169617
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,16,16,0,0.051641601324081424
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,16,32,0,0.037415999174118045
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,16,64,0,0.029275199770927428
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,32,1,0,0.5401279926300049
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,32,2,0,0.2860480070114136
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,32,4,0,0.1514575958251953
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,32,8,0,0.08458719849586487
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,32,16,0,0.050151997804641725
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,32,32,0,0.03731360137462616
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,32,64,0,0.02709920108318329
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,32,1,0,0.588804817199707
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,32,2,0,0.31185760498046877
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,32,4,0,0.16913599967956544
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,32,8,0,0.09839199781417847
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,32,16,0,0.06273120045661926
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,32,32,0,0.04559360146522522
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,32,64,0,0.03734880089759827
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,64,1,0,0.7664271831512451
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,64,2,0,0.40295681953430174
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,64,4,0,0.2137295961380005
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,64,8,0,0.12030559778213501
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,64,16,0,0.07004640102386475
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,64,32,0,0.04772480130195618
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,64,64,0,0.037273600697517395
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,64,1,0,0.9043951988220215
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,64,2,0,0.4732367992401123
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,64,4,0,0.25895678997039795
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,64,8,0,0.14543839693069457
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,64,16,0,0.08872479796409607
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,64,32,0,0.06166399717330932
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,64,64,0,0.04766879975795746
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,128,1,0,1.189020824432373
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,128,2,0,0.6259007930755616
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,128,4,0,0.33327679634094237
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,128,8,0,0.18715200424194336
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,128,16,0,0.11166239976882934
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,128,32,0,0.07432479858398437
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,128,64,0,0.055743998289108275
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,128,1,0,1.484889602661133
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,128,2,0,0.7722640037536621
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,128,4,0,0.41820478439331055
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,128,8,0,0.23654398918151856
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,128,16,0,0.14206240177154542
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,128,32,0,0.09786720275878906
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,128,64,0,0.07166079878807068
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,256,1,0,2.2002256393432615
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,256,2,0,1.1526240348815917
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,256,4,0,0.608681583404541
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,256,8,0,0.34322080612182615
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,256,16,0,0.20145759582519532
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,256,32,0,0.13229600191116334
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,256,64,0,0.09466879963874816
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,256,1,0,2.7464256286621094
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,256,2,0,1.421012783050537
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,256,4,0,0.7577568054199219
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,256,8,0,0.42326722145080564
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,256,16,0,0.2529968023300171
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,256,32,0,0.16732319593429565
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,256,64,0,0.12191359996795655
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,512,1,0,4.733111953735351
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,512,2,0,2.469432067871094
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,512,8,0,0.7163551807403564
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,512,4,0,1.2957679748535156
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,512,16,0,0.4124767780303955
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,512,64,0,0.18668639659881592
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,512,32,0,0.26406400203704833
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,512,1,0,5.772001647949219
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,512,2,0,2.9655439376831056
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,512,8,0,0.8633855819702149
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,512,4,0,1.5714271545410157
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,512,16,0,0.5038479804992676
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,512,32,0,0.3239295959472656
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,512,64,0,0.2325808048248291
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,256,1,1,0,0.6781519889831543
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,1,2,0,0.33269600868225097
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,1,4,0,0.18194880485534667
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,1,8,0,0.09481599926948547
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,1,16,0,0.055718398094177245
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,1,32,0,0.03329600095748901
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,1,64,0,0.025116801261901855
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,256,1,1,0,0.6304495811462403
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,1,2,0,0.3278543949127197
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,1,4,0,0.17470879554748536
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,1,8,0,0.10007679462432861
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,1,16,0,0.06190239787101746
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,1,32,0,0.04141440093517303
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,1,64,0,0.033139199018478394
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,256,16,1,0,0.8390687942504883
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,16,2,0,0.43617758750915525
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,16,8,0,0.12199840545654297
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,16,4,0,0.22936320304870605
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,16,16,0,0.07091519832611085
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,16,32,0,0.044828799366950986
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,16,64,0,0.031196799874305726
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,256,16,1,0,0.8587840080261231
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,16,2,0,0.4497519969940186
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,16,4,0,0.2443824052810669
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,16,8,0,0.1327903985977173
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,16,32,0,0.05627359747886658
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,16,16,0,0.07996159791946411
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,16,64,0,0.04139359891414642
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,256,32,1,0,1.0490655899047852
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,32,4,0,0.29054720401763917
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,32,8,0,0.15904159545898439
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,32,2,0,0.5529248237609863
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,32,16,0,0.09048640131950378
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,32,32,0,0.05685120224952698
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,32,64,0,0.04346239864826203
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,256,32,1,0,1.1399423599243164
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,32,8,0,0.17683839797973633
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,32,2,0,0.590550422668457
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,32,4,0,0.31785120964050295
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,32,16,0,0.10636639595031738
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,32,64,0,0.05364480018615723
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,256,64,1,0,1.4875264167785645
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,32,32,0,0.06985440254211425
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,64,2,0,0.779695987701416
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,64,4,0,0.40955681800842286
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,64,8,0,0.22497119903564453
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,64,16,0,0.13364160060882568
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,64,32,0,0.08259360194206238
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,64,64,0,0.05984640121459961
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,256,64,1,0,1.7642351150512696
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,64,2,0,0.9165887832641602
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,64,4,0,0.49059200286865234
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,64,8,0,0.27245919704437255
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,64,16,0,0.16016800403594972
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,64,32,0,0.10495519638061523
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,64,64,0,0.07627840042114258
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,256,128,1,0,2.33644962310791
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,128,2,0,1.2147071838378907
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,128,8,0,0.3564512014389038
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,128,4,0,0.6473696231842041
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,128,16,0,0.20863840579986573
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,128,32,0,0.13363840579986572
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,128,64,0,0.09690560102462768
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,256,128,1,0,2.9297119140625
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,128,2,0,1.5079936027526855
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,128,8,0,0.44583840370178224
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,128,4,0,0.7997168064117431
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,128,16,0,0.2665663957595825
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,128,32,0,0.17044479846954347
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,128,64,0,0.12563199996948243
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,256,256,1,0,4.337776184082031
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,256,8,0,0.6586143970489502
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,256,4,0,1.1855520248413085
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,256,2,0,2.2623695373535155
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,256,16,0,0.3857439994812012
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,256,32,0,0.24674880504608154
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,256,64,0,0.17838079929351808
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,256,256,1,0,5.45089111328125
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,256,2,0,2.7996944427490233
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,256,8,0,0.812622356414795
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,256,4,0,1.4793935775756837
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,256,16,0,0.48396477699279783
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,256,32,0,0.31158719062805174
SGLang,0.5.9,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,256,64,0,0.22388639450073242
