framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,1,0.012507200241088867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,1,0.012489599734544754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,1,0.011857599765062333
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,1,0.010681600123643876
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,1,0.010558400303125381
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,1,0.010569600015878677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,1,0.010580799728631973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,1,0.010731200128793717
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,1,0.010555200278759003
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,1,0.010599999874830245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,1,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,1,0.010593599826097488
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,1,0.010576000064611435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,1,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,3,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,3,0.012444800138473511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,3,0.01061599999666214
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,3,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,3,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,3,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,3,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,3,0.010635200142860412
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,3,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,3,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,3,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,3,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,3,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,7,0.012379200011491776
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,7,0.012492799758911132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,7,0.010847999900579452
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,7,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,7,0.01051200032234192
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,7,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,7,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,7,0.010540799796581268
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,7,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,7,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,7,0.010527999699115753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,7,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,7,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,15,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,15,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,15,0.010569600015878677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,15,0.010564800351858139
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,15,0.010542400181293488
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,15,0.010681600123643876
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,15,0.010571199655532836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,15,0.0106175996363163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,15,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,15,0.010582400113344192
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,15,0.010550399869680404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,15,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,3,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,15,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,31,0.012454400211572647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,31,0.01236959993839264
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,31,0.012481600046157837
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,31,0.010572800040245056
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,31,0.010545600205659866
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,31,0.010532800108194351
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,31,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,31,0.010556799918413162
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,31,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,31,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,31,0.010673599690198899
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,31,0.010294400155544281
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,31,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,31,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,63,0.012457600235939026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,63,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,63,0.01231520026922226
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,63,0.010633599758148194
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,63,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,63,0.010385599732398988
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,63,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,63,0.010548800230026245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,63,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,63,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,63,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,63,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,63,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,63,0.01037919968366623
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,7,0.010579200088977813
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,127,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,127,0.012430399656295776
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,127,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,127,0.010611200332641601
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,127,0.010367999970912933
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,127,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,127,0.010574399679899215
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,127,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,127,0.010582400113344192
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,15,0.010577599704265594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,127,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,127,0.010315199941396713
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,127,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,127,0.010547199845314026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,127,0.01064160019159317
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,255,0.012462399899959564
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,255,0.01241919994354248
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,255,0.010545600205659866
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,255,0.010558400303125381
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,255,0.010566399991512298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,255,0.010539200156927109
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,255,0.010609599947929382
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,255,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,255,0.010385599732398988
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,255,0.010550399869680404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,255,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,255,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,255,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,255,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,511,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,511,0.012641599774360657
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,511,0.013952000439167023
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,511,0.0126351997256279
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,511,0.012681600451469422
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,511,0.0126351997256279
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,511,0.01252480000257492
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,511,0.012569600343704223
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,511,0.01250240057706833
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,511,0.01244800016283989
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,511,0.012516799569129943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,511,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,511,0.010606399923563003
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,511,0.012534399330615998
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,1023,0.014659200608730317
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,1023,0.015761600434780122
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,1023,0.016433599591255187
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,1023,0.014523200690746307
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,1023,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,1023,0.014407999813556671
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,1023,0.01446399986743927
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,1023,0.014892800152301789
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,1023,0.014606399834156037
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,1023,0.014444799721240997
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,1023,0.01260959953069687
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,1023,0.012787200510501862
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,1023,0.01443679928779602
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,1023,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,2047,0.01664319932460785
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,2047,0.016700799763202667
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,2047,0.014856000244617463
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,2047,0.014558400213718414
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,2047,0.014662399888038635
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,2047,0.014670400321483612
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,2047,0.014747199416160584
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,2047,0.014620800316333771
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,2047,0.014620800316333771
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,2047,0.01462559998035431
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,2047,0.013726399838924408
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,2047,0.01419840008020401
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,2047,0.012601600587368011
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,2047,0.012585599720478059
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,4095,0.018585599958896637
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,4095,0.014684799313545226
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,4095,0.014688000082969666
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,4095,0.01658719927072525
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,4095,0.014635199308395385
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,4095,0.014694400131702423
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,4095,0.01690080016851425
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,4095,0.01650079935789108
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,4095,0.014627200365066529
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,4095,0.014641599357128143
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,4095,0.014628799259662628
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,4095,0.014569599926471711
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,4095,0.01448799967765808
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,4095,0.014577600359916686
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,8191,0.018775999546051025
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,8191,0.018563200533390046
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,8191,0.024377599358558655
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,8191,0.016579200327396394
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,8191,0.016676799952983858
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,8191,0.0166143998503685
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,8191,0.01669600009918213
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,8191,0.01879200041294098
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,8191,0.0166703999042511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,8191,0.016577599942684172
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,8191,0.014633600413799287
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,8191,0.014908799529075622
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,8191,0.016492800414562227
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,8191,0.014638400077819825
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,16383,0.02080480009317398
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,16383,0.026927998661994933
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,16383,0.019852800667285918
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,16383,0.02065120041370392
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,16383,0.02067680060863495
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,16383,0.02889919877052307
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,16383,0.020571200549602507
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,16383,0.02085919976234436
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,16383,0.02091359943151474
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,16383,0.018699200451374055
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,16383,0.018585599958896637
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,16383,0.018636800348758698
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,16383,0.018806399405002595
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,16383,0.018668800592422485
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,32767,0.03730559945106506
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,32767,0.03097119927406311
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,32767,0.02744640111923218
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,32767,0.027006399631500245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,32767,0.029070401191711427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,32767,0.026980799436569215
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,32767,0.027635198831558228
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,32767,0.02691679894924164
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,32767,0.024879999458789825
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,32767,0.02451840043067932
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,32767,0.02295839935541153
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,32767,0.022804799675941467
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,32767,0.022753599286079406
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,32767,0.022947199642658234
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,65535,0.039612799882888794
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,65535,0.043326398730278014
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,65535,0.03515680134296417
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,65535,0.03522399961948395
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,65535,0.03535839915275574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,65535,0.03399679958820343
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,65535,0.03505280017852783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,65535,0.029102399945259094
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,65535,0.028940799832344054
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,65535,0.03866879940032959
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,65535,0.02884959876537323
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,65535,0.028907200694084166
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,65535,0.026924800872802735
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,65535,0.02877599895000458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,131071,0.061831998825073245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,131071,0.054425597190856934
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,131071,0.05321120023727417
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,131071,0.05602080225944519
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,131071,0.0554144024848938
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,131071,0.07243520021438599
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,131071,0.0557856023311615
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,131071,0.042444801330566405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,131071,0.03918719887733459
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,131071,0.034683200716972354
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,131071,0.03314880132675171
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,131071,0.0331824004650116
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,131071,0.033257600665092465
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,131071,0.03312320113182068
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,1,0.0124208003282547
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,1,0.012468799948692322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,1,0.012574400007724761
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,1,0.01056319996714592
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,1,0.010580799728631973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,1,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,1,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,1,0.010676799714565277
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,1,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,1,0.01053600013256073
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,1,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,1,0.010596799850463866
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,1,0.010542400181293488
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,1,0.010555200278759003
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,3,0.012564800679683685
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,3,0.011081600189208984
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,3,0.012444800138473511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,3,0.010652799904346467
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,3,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,3,0.010608000308275222
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,3,0.010577599704265594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,3,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,3,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,3,0.010574399679899215
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,3,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,3,0.010555200278759003
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,3,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,3,0.010654400289058685
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,7,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,7,0.012542399764060973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,7,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,7,0.010576000064611435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,7,0.010540799796581268
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,7,0.010604800283908844
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,7,0.010553599894046783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,7,0.010603199899196624
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,7,0.010627199709415436
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,7,0.010569600015878677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,7,0.010334400087594986
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,7,0.01055999994277954
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,7,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,7,0.010566399991512298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,15,0.01242400035262108
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,15,0.012555199861526489
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,15,0.010527999699115753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,15,0.010623999685049058
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,15,0.01069760024547577
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,15,0.010758399963378906
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,15,0.01058719977736473
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,15,0.01056160032749176
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,15,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,15,0.010807999968528747
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,15,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,15,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,31,0.012403199821710587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,15,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,15,0.010659199953079224
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,31,0.012503999471664428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,31,0.012492799758911132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,31,0.010571199655532836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,31,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,31,0.010583999752998351
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,31,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,31,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,31,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,31,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,31,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,31,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,31,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,31,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,63,0.012372799962759019
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,63,0.012038400024175644
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,63,0.012452799826860428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,63,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,63,0.010603199899196624
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,63,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,63,0.010550399869680404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,63,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,63,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,63,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,63,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,63,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,63,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,63,0.010543999820947647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,127,0.012503999471664428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,127,0.012462399899959564
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,127,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,127,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,127,0.010571199655532836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,127,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,127,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,127,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,127,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,127,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,127,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,127,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,127,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,127,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,255,0.012859199941158295
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,255,0.01241919994354248
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,255,0.011964800208806992
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,255,0.01053600013256073
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,255,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,255,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,255,0.010542400181293488
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,255,0.010552000254392624
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,255,0.010520000010728836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,255,0.010545600205659866
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,255,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,255,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,255,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,255,0.010532800108194351
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,511,0.014481599628925323
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,511,0.014508800208568573
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,511,0.012848000228404998
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,511,0.012548799812793731
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,511,0.012598399817943574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,511,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,511,0.012529599666595458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,511,0.012382400035858155
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,511,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,511,0.012443199753761292
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,511,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,511,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,511,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,511,0.010726399719715118
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,1023,0.014550399780273438
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,1023,0.014628799259662628
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,1023,0.014433600008487701
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,1023,0.014575999975204468
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,1023,0.014545600116252898
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,1023,0.014606399834156037
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,1023,0.012588800489902496
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,1023,0.013123199343681335
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,1023,0.012649600207805634
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,1023,0.012529599666595458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,1023,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,1023,0.012529599666595458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,1023,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,1023,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,2047,0.0165583997964859
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,2047,0.014593599736690522
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,2047,0.014524799585342408
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,2047,0.012643200159072877
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,2047,0.013091200590133667
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,2047,0.012601600587368011
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,2047,0.012572799623012543
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,2047,0.014494399726390838
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,2047,0.012532800436019897
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,2047,0.01250240057706833
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,2047,0.01242239996790886
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,2047,0.012505599856376648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,2047,0.01252480000257492
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,4095,0.016680000722408293
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,2047,0.012612800300121307
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,4095,0.01857440024614334
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,4095,0.014504000544548035
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,4095,0.014548799395561219
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,4095,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,4095,0.013307200372219085
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,4095,0.013291199505329133
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,4095,0.012628799676895142
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,4095,0.01647839993238449
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,4095,0.016396799683570863
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,4095,0.012622399628162384
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,4095,0.012567999958992004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,4095,0.012508800625801087
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,4095,0.012624000012874604
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,8191,0.020873600244522096
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,8191,0.020652799308300017
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,8191,0.018508799374103546
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,8191,0.018881599605083465
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,8191,0.01855680048465729
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,8191,0.016726399958133697
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,8191,0.017822399735450745
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,8191,0.016715200245380403
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,8191,0.018587200343608855
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,8191,0.016715200245380403
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,8191,0.01661760061979294
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,8191,0.016519999504089354
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,8191,0.01656640022993088
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,16383,0.032918399572372435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,8191,0.016620799899101257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,16383,0.024702399969100952
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,16383,0.02255840003490448
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,16383,0.020851199328899384
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,16383,0.020715199410915375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,16383,0.020632000267505647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,16383,0.020636799931526183
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,16383,0.024820800125598907
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,16383,0.02078240066766739
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,16383,0.019068799912929535
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,16383,0.018651199340820313
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,16383,0.01852640062570572
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,16383,0.018611200153827667
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,16383,0.018486399948596955
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,32767,0.03699040114879608
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,32767,0.037062400579452516
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,32767,0.028896000981330872
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,32767,0.026876801252365114
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,32767,0.02688319981098175
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,32767,0.026814401149749756
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,32767,0.026315200328826904
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,32767,0.026633599400520326
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,32767,0.02476159930229187
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,32767,0.0228752002120018
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,32767,0.03581759929656982
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,32767,0.022843199968338012
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,32767,0.022910399734973906
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,32767,0.02282399982213974
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,65535,0.05861759781837463
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,65535,0.05628160238265991
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,65535,0.05247200131416321
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,65535,0.05339999794960022
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,65535,0.0549776017665863
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,65535,0.05287039875984192
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,65535,0.05493919849395752
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,65535,0.0388047993183136
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,65535,0.03368160128593445
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,65535,0.031004801392555237
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,65535,0.03094879984855652
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,65535,0.03091199994087219
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,65535,0.0295199990272522
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,65535,0.030827200412750243
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,131071,0.08369759917259216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,131071,0.09316800236701965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,131071,0.07675840258598328
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,131071,0.07604479789733887
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,131071,0.07817919850349427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,131071,0.07922880053520202
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,131071,0.0773967981338501
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,131071,0.055366402864456175
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,131071,0.05318560004234314
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,131071,0.04719040095806122
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,131071,0.046649599075317384
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,131071,0.04803040027618408
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,1,0.012432000041007996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,131071,0.047153601050376893
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,131071,0.04812000095844269
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,1,0.01236959993839264
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,1,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,1,0.011558400094509124
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,1,0.010547199845314026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,1,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,1,0.010371199995279311
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,1,0.010569600015878677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,1,0.010619200021028518
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,1,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,1,0.010548800230026245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,1,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,1,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,1,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,3,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,3,0.012383999675512314
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,3,0.012383999675512314
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,3,0.010527999699115753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,3,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,3,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,3,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,3,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,3,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,3,0.010550399869680404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,3,0.010619200021028518
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,3,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,3,0.01051200032234192
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,3,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,7,0.01252640038728714
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,7,0.01284639984369278
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,7,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,7,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,7,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,7,0.010547199845314026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,7,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,7,0.010564800351858139
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,7,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,7,0.010545600205659866
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,7,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,7,0.01037919968366623
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,7,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,7,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,15,0.012540799379348756
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,15,0.012441600114107132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,15,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,15,0.010635200142860412
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,15,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,15,0.010558400303125381
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,15,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,15,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,15,0.010639999806880952
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,15,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,15,0.010966400057077408
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,15,0.010283199697732925
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,15,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,15,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,31,0.012401600182056428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,31,0.01188960000872612
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,31,0.012417600303888322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,31,0.012399999797344208
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,31,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,31,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,31,0.01056160032749176
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,31,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,31,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,31,0.010572800040245056
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,31,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,31,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,31,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,31,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,63,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,63,0.012430399656295776
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,63,0.012414400279521943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,63,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,63,0.010582400113344192
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,63,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,63,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,63,0.01064160019159317
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,63,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,63,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,63,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,63,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,63,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,63,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,127,0.012449599802494049
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,127,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,127,0.012451200187206269
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,127,0.010555200278759003
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,127,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,127,0.010583999752998351
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,127,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,127,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,127,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,127,0.010548800230026245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,127,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,127,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,127,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,127,0.010558400303125381
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,255,0.01252640038728714
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,255,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,255,0.012468799948692322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,255,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,255,0.012443199753761292
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,255,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,255,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,255,0.011073599755764007
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,255,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,255,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,255,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,255,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,255,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,511,0.014478400349617004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,255,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,511,0.014574399590492249
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,511,0.013470399379730224
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,511,0.012715199589729309
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,511,0.012691199779510498
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,511,0.012513600289821625
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,511,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,511,0.01250240057706833
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,511,0.012449599802494049
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,511,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,511,0.012457600235939026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,511,0.01244639977812767
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,511,0.012468799948692322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,511,0.012439999729394913
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,1023,0.01658719927072525
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,1023,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,1023,0.014723199605941772
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,1023,0.014553600549697876
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,1023,0.014499199390411378
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,1023,0.012998400628566742
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,1023,0.012611199915409089
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,1023,0.014468799531459808
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,1023,0.012547199428081513
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,1023,0.012745599448680877
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,1023,0.012508800625801087
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,1023,0.012462399899959564
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,1023,0.012615999579429627
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,1023,0.01247519999742508
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,2047,0.01789119988679886
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,2047,0.016771200299263
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,2047,0.01451839953660965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,2047,0.014548799395561219
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,2047,0.013660800457000733
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,2047,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,2047,0.014476799964904785
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,2047,0.014539200067520141
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,2047,0.014679999649524688
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,2047,0.012515200674533844
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,2047,0.012654399871826172
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,2047,0.012547199428081513
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,2047,0.012507200241088867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,2047,0.012583999335765839
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,4095,0.020694400370121
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,4095,0.01868479996919632
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,4095,0.017892800271511078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,4095,0.016649599373340606
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,4095,0.016599999368190767
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,4095,0.01653279960155487
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,4095,0.01658399999141693
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,4095,0.016732800006866454
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,4095,0.016622400283813475
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,4095,0.01658080071210861
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,4095,0.014547200500965118
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,4095,0.014689600467681885
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,4095,0.014560000598430633
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,4095,0.014654399454593658
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,8191,0.021140800416469575
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,8191,0.030875200033187868
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,8191,0.020641599595546723
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,8191,0.018606400489807128
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,8191,0.018731200695037843
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,8191,0.01855199933052063
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,8191,0.01860959976911545
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,8191,0.01858399957418442
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,8191,0.02271360009908676
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,8191,0.016711999475955964
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,8191,0.01671839952468872
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,8191,0.01661120057106018
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,8191,0.0165583997964859
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,8191,0.016638399660587312
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,16383,0.030964800715446474
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,16383,0.03481920063495636
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,16383,0.024745599925518037
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,16383,0.022767999768257143
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,16383,0.022881600260734557
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,16383,0.02274399995803833
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,16383,0.022443200647830962
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,16383,0.02903839945793152
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,16383,0.02466239929199219
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,16383,0.020683200657367708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,16383,0.02054080069065094
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,16383,0.018785600364208222
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,16383,0.018676799535751343
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,16383,0.018772800266742707
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,32767,0.05325279831886291
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,32767,0.05367680191993714
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,32767,0.04835520088672638
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,32767,0.04766559898853302
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,32767,0.04934560060501099
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,32767,0.04895519912242889
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,32767,0.04792959988117218
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,32767,0.03476159870624542
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,32767,0.03172000050544739
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,32767,0.02677600085735321
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,32767,0.026811200380325317
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,32767,0.02675360143184662
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,32767,0.024817599356174468
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,32767,0.02480800002813339
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,65535,0.0796832025051117
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,65535,0.0709775984287262
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,65535,0.09027519822120667
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,65535,0.07233920097351074
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,65535,0.07156959772109986
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,65535,0.07351679801940918
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,65535,0.07160800099372863
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,65535,0.04943679869174957
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,65535,0.050755202770233154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,65535,0.041391998529434204
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,65535,0.04233759939670563
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,65535,0.04325760006904602
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,65535,0.04123679995536804
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,65535,0.042403200268745424
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,131071,0.13295520544052125
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,131071,0.1577952027320862
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,131071,0.12088799476623535
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,131071,0.12024480104446411
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,131071,0.12143839597702026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,131071,0.12052320241928101
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,131071,0.12137600183486938
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,131071,0.0637776017189026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,131071,0.0825872004032135
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,131071,0.07533919811248779
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,131071,0.062352001667022705
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,131071,0.06429920196533204
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,131071,0.06358720064163208
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,1,0.012566399574279786
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,131071,0.06313120126724243
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,1,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,1,0.01268800050020218
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,1,0.011363200098276138
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,1,0.011963199824094772
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,1,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,1,0.010769599676132202
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,1,0.012563200294971466
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,1,0.010636799782514573
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,1,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,1,0.010700800269842149
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,1,0.01053439974784851
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,1,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,1,0.010716799646615982
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,3,0.012601600587368011
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,3,0.012439999729394913
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,3,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,3,0.0125231996178627
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,3,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,3,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,3,0.012724800407886505
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,3,0.012518399953842163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,3,0.011275199800729751
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,3,0.010547199845314026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,3,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,3,0.010580799728631973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,3,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,3,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,7,0.012620800733566284
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,7,0.01242239996790886
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,7,0.01234079971909523
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,7,0.012256000190973282
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,7,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,7,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,7,0.010520000010728836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,7,0.012476799637079239
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,7,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,7,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,7,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,7,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,7,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,7,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,15,0.012505599856376648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,15,0.012425599992275238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,15,0.011776000261306763
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,15,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,15,0.012454400211572647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,15,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,15,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,15,0.012481600046157837
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,15,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,15,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,15,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,15,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,15,0.010395199805498124
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,31,0.012478400021791458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,31,0.012539200484752655
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,15,0.010569600015878677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,31,0.012438400089740754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,31,0.01178240031003952
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,31,0.01064160019159317
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,31,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,31,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,31,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,31,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,31,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,31,0.010585600137710571
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,31,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,31,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,31,0.010520000010728836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,63,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,63,0.012529599666595458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,63,0.01242239996790886
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,63,0.01175839975476265
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,63,0.012169600278139115
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,63,0.010543999820947647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,63,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,63,0.012428800016641617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,63,0.010590399801731109
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,63,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,63,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,63,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,63,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,63,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,127,0.012593600153923034
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,127,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,127,0.01170239970088005
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,127,0.01231520026922226
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,127,0.010608000308275222
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,127,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,127,0.010606399923563003
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,127,0.011017599701881408
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,127,0.012478400021791458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,127,0.010598400235176086
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,127,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,127,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,127,0.010620799660682679
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,127,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,255,0.012630400061607362
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,255,0.012537600100040435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,255,0.012377600371837615
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,255,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,255,0.010601600259542465
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,255,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,255,0.010579200088977813
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,255,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,255,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,255,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,255,0.010539200156927109
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,255,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,255,0.010547199845314026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,255,0.010846400260925293
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,511,0.014500799775123595
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,511,0.014427199959754944
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,511,0.012537600100040435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,511,0.014636799693107605
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,511,0.01271360069513321
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,511,0.012468799948692322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,511,0.012630400061607362
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,511,0.01279360055923462
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,511,0.012510399520397186
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,511,0.012567999958992004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,511,0.01252799928188324
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,511,0.012583999335765839
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,511,0.012462399899959564
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,511,0.012678399682044983
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,1023,0.016780799627304076
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,1023,0.016599999368190767
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,1023,0.014838400483131408
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,1023,0.014734399318695069
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,1023,0.01451359987258911
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,1023,0.014368000626564025
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,1023,0.014739200472831726
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,1023,0.014585599303245544
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,1023,0.014504000544548035
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,1023,0.014585599303245544
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,1023,0.01250240057706833
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,1023,0.012617599964141846
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,1023,0.012828800082206725
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,1023,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,2047,0.020904000103473663
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,2047,0.016612799465656282
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,2047,0.018804800510406495
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,2047,0.0166143998503685
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,2047,0.016628800332546233
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,2047,0.016475200653076172
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,2047,0.016715200245380403
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,2047,0.016582399606704712
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,2047,0.014664000272750855
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,2047,0.014747199416160584
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,2047,0.012627199292182922
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,2047,0.014267200231552124
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,2047,0.014547200500965118
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,4095,0.028958401083946227
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,2047,0.012516799569129943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,4095,0.02078240066766739
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,4095,0.01881600022315979
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,4095,0.018518400192260743
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,4095,0.017871999740600587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,4095,0.018676799535751343
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,4095,0.01740639954805374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,4095,0.020764799416065217
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,4095,0.0165120005607605
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,4095,0.016607999801635742
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,4095,0.01648160070180893
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,4095,0.014601600170135499
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,4095,0.014612799882888794
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,8191,0.029542401432991028
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,8191,0.022703999280929567
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,8191,0.03176800012588501
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,4095,0.014574399590492249
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,8191,0.02074880003929138
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,8191,0.020688000321388244
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,8191,0.02263679951429367
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,8191,0.020576000213623047
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,8191,0.02479040026664734
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,8191,0.022700800001621245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,8191,0.01855839937925339
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,8191,0.01857440024614334
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,8191,0.017500799894332886
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,8191,0.016803200542926788
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,8191,0.016652800142765045
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,16383,0.049486398696899414
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,16383,0.05748159885406494
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,16383,0.04723680019378662
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,16383,0.04534400105476379
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,16383,0.04548639953136444
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,16383,0.045259198546409606
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,16383,0.04461759924888611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,16383,0.02892000079154968
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,16383,0.031913599371910094
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,16383,0.02476000040769577
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,16383,0.02476799935102463
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,16383,0.02479359954595566
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,16383,0.022843199968338012
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,16383,0.02289759963750839
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,32767,0.07586719989776611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,32767,0.0933184027671814
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,32767,0.06888480186462402
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,32767,0.06851360201835632
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,32767,0.0689631998538971
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,32767,0.06792160272598266
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,32767,0.046291199326515195
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,32767,0.06874719858169556
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,32767,0.04049760103225708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,32767,0.051928001642227176
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,32767,0.039273598790168764
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,32767,0.03902879953384399
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,32767,0.03926079869270325
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,32767,0.039024001359939574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,65535,0.12582720518112184
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,65535,0.11955679655075073
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,65535,0.162716805934906
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,65535,0.11870399713516236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,65535,0.11962399482727051
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,65535,0.11981760263442993
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,65535,0.11762720346450806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,65535,0.07032960057258605
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,65535,0.08314719796180725
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,65535,0.061459201574325564
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,65535,0.06067039966583252
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,65535,0.06015679836273193
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,65535,0.05983039736747742
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,65535,0.05965759754180908
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,131071,0.23240160942077637
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,131071,0.2176975965499878
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,131071,0.3005392074584961
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,131071,0.21684639453887938
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,131071,0.21776959896087647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,131071,0.21758720874786378
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,131071,0.11862560510635375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,131071,0.21486399173736573
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,131071,0.14743679761886597
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,131071,0.10542880296707154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,131071,0.10067520141601563
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,131071,0.10412000417709351
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,1,0.012654399871826172
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,131071,0.10484479665756226
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,131071,0.10053440332412719
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,1,0.012559999525547028
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,1,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,1,0.012507200241088867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,1,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,1,0.011033599823713302
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,1,0.010644800215959548
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,1,0.012564800679683685
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,1,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,1,0.012417600303888322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,1,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,1,0.010571199655532836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,1,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,1,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,3,0.0141744002699852
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,3,0.01255359947681427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,3,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,3,0.012414400279521943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,3,0.012460800260305405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,3,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,3,0.011710400134325028
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,3,0.012428800016641617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,3,0.012535999715328216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,3,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,3,0.010728000104427338
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,3,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,3,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,3,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,7,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,7,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,7,0.013246400654315949
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,7,0.012280000001192093
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,7,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,7,0.01077279970049858
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,7,0.012435200065374375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,7,0.012510399520397186
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,7,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,7,0.012494400143623352
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,7,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,7,0.010828799754381179
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,7,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,7,0.010576000064611435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,15,0.012571200728416443
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,15,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,15,0.012777599692344665
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,15,0.012415999919176102
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,15,0.012454400211572647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,15,0.011084800213575363
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,15,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,15,0.012595200538635254
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,15,0.012438400089740754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,15,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,15,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,15,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,15,0.010571199655532836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,15,0.010558400303125381
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,31,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,31,0.012638400495052337
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,31,0.01074879989027977
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,31,0.01239359974861145
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,31,0.012436799705028534
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,31,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,31,0.010608000308275222
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,31,0.012567999958992004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,31,0.012107200175523757
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,31,0.010539200156927109
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,31,0.010566399991512298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,31,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,31,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,31,0.010577599704265594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,63,0.01310880035161972
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,63,0.012591999769210816
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,63,0.012478400021791458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,63,0.012428800016641617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,63,0.01236959993839264
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,63,0.012432000041007996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,63,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,63,0.012435200065374375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,63,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,63,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,63,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,63,0.010580799728631973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,63,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,63,0.01053439974784851
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,127,0.013363200426101684
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,127,0.01250240057706833
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,127,0.012503999471664428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,127,0.012417600303888322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,127,0.011686400324106217
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,127,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,127,0.01247519999742508
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,127,0.012507200241088867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,127,0.01239520013332367
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,127,0.011510399729013443
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,127,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,127,0.010548800230026245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,127,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,255,0.013235199451446533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,127,0.010540799796581268
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,255,0.012593600153923034
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,255,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,255,0.012392000108957291
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,255,0.012411200255155564
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,255,0.0112527996301651
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,255,0.012404800206422806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,255,0.012427199631929398
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,255,0.01247360035777092
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,255,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,255,0.010539200156927109
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,255,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,255,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,255,0.010569600015878677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,511,0.016531200706958772
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,511,0.014561599493026734
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,511,0.014459200203418732
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,511,0.014523200690746307
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,511,0.014433600008487701
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,511,0.014563199877738953
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,511,0.013327999413013459
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,511,0.012569600343704223
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,511,0.012508800625801087
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,511,0.014484800398349762
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,511,0.012454400211572647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,511,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,511,0.01241919994354248
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,511,0.012436799705028534
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,1023,0.020603199303150178
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,1023,0.016809600591659545
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,1023,0.01666879951953888
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,1023,0.01648160070180893
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,1023,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,1023,0.014655999839305878
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,1023,0.014659200608730317
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,1023,0.016763199865818024
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,1023,0.014713600277900696
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,1023,0.014430400729179383
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,1023,0.012612800300121307
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,1023,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,1023,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,1023,0.012697599828243256
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,2047,0.026844799518585205
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,2047,0.020721599459648132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,2047,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,2047,0.01706400066614151
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,2047,0.01855839937925339
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,2047,0.018566399812698364
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,2047,0.01659200042486191
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,2047,0.02062239944934845
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,2047,0.0166703999042511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,2047,0.014510400593280792
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,2047,0.0144896000623703
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,2047,0.01459999978542328
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,2047,0.014499199390411378
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,2047,0.014556799829006196
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,4095,0.02879199981689453
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,4095,0.030731201171875
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,4095,0.02081120014190674
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,4095,0.02080000042915344
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,4095,0.020652799308300017
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,4095,0.020160000026226043
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,4095,0.018775999546051025
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,4095,0.022729599475860597
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,4095,0.020771199464797975
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,4095,0.01661919951438904
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,4095,0.016646400094032288
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,4095,0.01663520038127899
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,4095,0.016648000478744505
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,4095,0.01658080071210861
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,8191,0.050046402215957644
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,8191,0.05409600138664246
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,8191,0.04526239931583405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,8191,0.04467679858207703
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,8191,0.04477599859237671
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,8191,0.04362080097198486
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,8191,0.04323840141296387
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,8191,0.028248000144958495
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,8191,0.028915199637413024
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,8191,0.022592000663280487
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,8191,0.02093279957771301
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,8191,0.020707200467586517
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,8191,0.020771199464797975
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,8191,0.020803199708461763
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,16383,0.07491199970245362
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,16383,0.09218080043792724
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,16383,0.06960800290107727
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,16383,0.07006880044937133
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,16383,0.06865599751472473
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,16383,0.06916159987449647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,16383,0.06746399998664857
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,16383,0.046060800552368164
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,16383,0.050241601467132566
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,16383,0.0395823985338211
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,16383,0.0384799987077713
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,16383,0.03922559916973114
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,16383,0.03801920115947723
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,16383,0.03735199868679047
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,32767,0.12980159521102905
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,32767,0.12106720209121705
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,32767,0.1692639946937561
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,32767,0.12013920545578002
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,32767,0.12270079851150513
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,32767,0.11819519996643066
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,32767,0.11949280500411988
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,32767,0.06849279999732971
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,32767,0.08530240058898926
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,32767,0.06130719780921936
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,32767,0.060868799686431885
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,32767,0.060684800148010254
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,32767,0.059575998783111574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,32767,0.06056320071220398
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,65535,0.227345609664917
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,65535,0.21753759384155275
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,65535,0.3216048002243042
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,65535,0.21869280338287353
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,65535,0.21721599102020264
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,65535,0.2161792039871216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,65535,0.21713919639587403
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,65535,0.11522079706192016
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,65535,0.15677119493484498
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,65535,0.10396159887313842
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,65535,0.10251519680023194
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,65535,0.10234240293502808
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,65535,0.10206880569458007
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,65535,0.10151840448379516
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,131071,0.42882399559020995
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,131071,0.40697760581970216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,131071,0.6303999900817872
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,131071,0.4038656234741211
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,131071,0.4068319797515869
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,131071,0.40446882247924804
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,131071,0.20692799091339112
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,131071,0.40674400329589844
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,131071,0.18655359745025635
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,131071,0.18638240098953246
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,131071,0.2991631984710693
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,131071,0.18456640243530273
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,1,0.014640000462532044
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,1,0.012619200348854064
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,1,0.012612800300121307
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,1,0.01247519999742508
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,1,0.012428800016641617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,1,0.012558400630950928
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,1,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,131071,0.18484480381011964
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,131071,0.18324960470199586
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,1,0.014431999623775482
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,1,0.012624000012874604
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,1,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,1,0.012535999715328216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,1,0.012515200674533844
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,1,0.012406399846076966
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,1,0.012427199631929398
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,3,0.014529600739479065
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,3,0.012564800679683685
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,3,0.012585599720478059
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,3,0.012503999471664428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,3,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,3,0.012510399520397186
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,3,0.012532800436019897
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,3,0.012544000148773193
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,3,0.012430399656295776
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,3,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,3,0.012515200674533844
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,3,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,3,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,3,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,7,0.014446400105953217
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,7,0.012608000636100769
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,7,0.012534399330615998
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,7,0.012439999729394913
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,7,0.012622399628162384
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,7,0.012438400089740754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,7,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,7,0.014468799531459808
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,7,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,7,0.012432000041007996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,7,0.010548800230026245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,7,0.012454400211572647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,7,0.010569600015878677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,7,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,15,0.014641599357128143
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,15,0.012510399520397186
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,15,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,15,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,15,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,15,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,15,0.012606400251388549
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,15,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,15,0.012457600235939026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,15,0.012329600006341934
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,15,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,15,0.01058880016207695
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,15,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,15,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,31,0.014564800262451171
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,31,0.012636800110340119
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,31,0.012518399953842163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,31,0.012558400630950928
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,31,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,31,0.012492799758911132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,31,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,31,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,31,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,31,0.012399999797344208
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,31,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,31,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,31,0.011644800007343293
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,31,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,63,0.014521600306034088
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,63,0.014238399267196656
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,63,0.012438400089740754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,63,0.012518399953842163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,63,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,63,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,63,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,63,0.012628799676895142
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,63,0.012417600303888322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,63,0.012436799705028534
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,63,0.011615999788045884
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,63,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,63,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,63,0.011617600172758102
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,127,0.01459999978542328
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,127,0.01255200058221817
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,127,0.01252640038728714
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,127,0.01244639977812767
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,127,0.012540799379348756
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,127,0.012534399330615998
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,127,0.01247360035777092
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,127,0.013835200667381286
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,127,0.012462399899959564
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,127,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,127,0.012454400211572647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,127,0.0112527996301651
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,127,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,127,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,255,0.014548799395561219
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,255,0.012567999958992004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,255,0.012598399817943574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,255,0.01250240057706833
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,255,0.012491200119256973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,255,0.012671999633312225
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,255,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,255,0.012670400738716125
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,255,0.012556800246238708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,255,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,255,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,255,0.012337599694728852
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,255,0.010542400181293488
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,255,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,511,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,511,0.018695999681949616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,511,0.014614400267601014
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,511,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,511,0.01525920033454895
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,511,0.01459999978542328
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,511,0.01448799967765808
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,511,0.014563199877738953
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,511,0.01257600039243698
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,511,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,511,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,511,0.012518399953842163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,511,0.012452799826860428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,511,0.01255200058221817
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,1023,0.024864000082015992
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,1023,0.01924320012331009
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,1023,0.016673600673675536
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,1023,0.016627199947834015
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,1023,0.016595199704170227
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,1023,0.016599999368190767
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,1023,0.016590400040149687
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,1023,0.018592000007629395
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,1023,0.01666879951953888
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,1023,0.014539200067520141
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,1023,0.014528000354766845
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,1023,0.014561599493026734
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,1023,0.014555199444293976
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,1023,0.014535999298095703
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,2047,0.02677919864654541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,2047,0.028681600093841554
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,2047,0.020732800662517547
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,2047,0.020588800311088562
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,2047,0.018680000305175783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,2047,0.020287999510765077
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,2047,0.01918399930000305
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,2047,0.020695999264717102
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,2047,0.020683200657367708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,2047,0.016579200327396394
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,2047,0.014715200662612915
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,2047,0.014612799882888794
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,2047,0.014571200311183929
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,2047,0.014616000652313232
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,4095,0.04944480061531067
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,4095,0.055190402269363406
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,4095,0.04195840060710907
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,4095,0.04186719954013825
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,4095,0.04174239933490753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,4095,0.042519998550415036
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,4095,0.041289600729942325
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,4095,0.026849600672721862
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,4095,0.0290367990732193
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,4095,0.020716799795627593
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,4095,0.02006240040063858
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,4095,0.020367999374866486
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,4095,0.01884160041809082
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,4095,0.019648000597953796
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,8191,0.07624160051345825
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,8191,0.06874399781227111
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,8191,0.09312959909439086
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,8191,0.0694383978843689
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,8191,0.06700959801673889
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,8191,0.06667680144309998
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,8191,0.06873760223388672
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,8191,0.04567520022392273
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,8191,0.05141760110855102
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,8191,0.03776639997959137
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,8191,0.037067198753356935
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,8191,0.0366351991891861
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,8191,0.03617120087146759
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,8191,0.03617919981479645
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,16383,0.13132959604263306
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,16383,0.17048799991607666
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,16383,0.12248159646987915
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,16383,0.12095520496368409
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,16383,0.11971520185470581
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,16383,0.11956640481948852
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,16383,0.12077280282974243
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,16383,0.0703935980796814
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,16383,0.08621919751167298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,16383,0.05985119938850403
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,16383,0.05906080007553101
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,16383,0.05839040279388428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,16383,0.05874720215797424
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,16383,0.05806080102920532
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,32767,0.23632960319519042
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,32767,0.22273919582366944
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,32767,0.32348480224609377
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,32767,0.22019519805908203
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,32767,0.2217616081237793
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,32767,0.2156303882598877
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,32767,0.22089920043945313
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,32767,0.15964640378952027
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,32767,0.12024799585342408
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,32767,0.10290720462799072
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,32767,0.10257279872894287
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,32767,0.10335839986801147
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,32767,0.10185279846191406
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,32767,0.10229279994964599
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,65535,0.44295201301574705
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,65535,0.4244832038879395
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,65535,0.6281216144561768
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,65535,0.41985602378845216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,65535,0.4207327842712402
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,65535,0.42255520820617676
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,65535,0.2175136089324951
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,65535,0.4192240238189697
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,65535,0.2990351915359497
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,65535,0.19141279458999633
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,65535,0.1929808020591736
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,65535,0.19135199785232543
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,1,0.014473600685596466
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,1,0.014619199931621552
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,65535,0.19054239988327026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,65535,0.1905408024787903
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,1,0.012614400684833526
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,1,0.012574400007724761
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,1,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,1,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,1,0.012580800056457519
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,1,0.014324800670146942
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,1,0.012561599910259246
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,1,0.012720000743865967
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,1,0.01239520013332367
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,1,0.012489599734544754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,1,0.012430399656295776
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,1,0.012444800138473511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,3,0.014524799585342408
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,3,0.014548799395561219
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,3,0.013950400054454803
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,3,0.012540799379348756
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,3,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,3,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,3,0.012507200241088867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,3,0.012604799866676331
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,3,0.013412800431251527
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,3,0.012529599666595458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,3,0.012403199821710587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,3,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,3,0.011260800063610077
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,3,0.0117807999253273
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,7,0.014550399780273438
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,7,0.01451680064201355
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,7,0.012579199671745301
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,7,0.012508800625801087
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,7,0.012622399628162384
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,7,0.012539200484752655
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,7,0.012535999715328216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,7,0.012614400684833526
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,7,0.012593600153923034
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,7,0.012433599680662155
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,7,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,7,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,7,0.012491200119256973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,7,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,15,0.014526399970054626
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,15,0.014579200744628906
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,15,0.014425599575042724
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,15,0.012620800733566284
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,15,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,15,0.01265760064125061
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,15,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,15,0.012806400656700134
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,15,0.012545600533485413
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,15,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,15,0.01239359974861145
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,15,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,15,0.011478400230407715
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,15,0.012451200187206269
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,31,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,31,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,31,0.012503999471664428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,31,0.014451199769973755
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,31,0.012513600289821625
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,31,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,31,0.012614400684833526
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,31,0.014396800100803376
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,31,0.012545600533485413
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,31,0.012513600289821625
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,31,0.01244800016283989
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,31,0.01242239996790886
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,31,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,31,0.012392000108957291
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,63,0.014617599546909332
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,63,0.014640000462532044
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,63,0.012505599856376648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,63,0.012665599584579468
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,63,0.012571200728416443
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,63,0.012593600153923034
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,63,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,63,0.012619200348854064
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,63,0.012534399330615998
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,63,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,63,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,63,0.012443199753761292
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,63,0.010856000334024429
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,63,0.01247519999742508
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,127,0.014504000544548035
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,127,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,127,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,127,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,127,0.012571200728416443
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,127,0.012468799948692322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,127,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,127,0.012651200592517852
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,127,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,127,0.012491200119256973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,127,0.012433599680662155
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,127,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,127,0.012409599870443344
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,127,0.012406399846076966
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,255,0.014529600739479065
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,255,0.014545600116252898
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,255,0.014427199959754944
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,255,0.012681600451469422
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,255,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,255,0.012566399574279786
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,255,0.012491200119256973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,255,0.014472000300884247
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,255,0.013844799995422364
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,255,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,255,0.012409599870443344
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,255,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,255,0.010574399679899215
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,255,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,511,0.02268480062484741
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,511,0.01863519996404648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,511,0.01661919951438904
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,511,0.016308799386024475
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,511,0.01643519997596741
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,511,0.014628799259662628
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,511,0.01653439998626709
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,511,0.020606400072574617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,511,0.014574399590492249
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,511,0.014486399292945863
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,511,0.012617599964141846
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,511,0.012652799487113953
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,511,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,511,0.012779200077056884
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,1023,0.026895999908447266
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,1023,0.028923198580741882
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,1023,0.020670400559902193
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,1023,0.018648000061511995
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,1023,0.018607999384403228
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,1023,0.018673600256443025
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,1023,0.018804800510406495
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,1023,0.022737599909305573
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,1023,0.018694399297237395
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,1023,0.017107200622558594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,1023,0.014636799693107605
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,1023,0.014547200500965118
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,1023,0.014711999893188476
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,1023,0.014528000354766845
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,2047,0.05141760110855102
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,2047,0.04351840019226074
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,2047,0.0547760009765625
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,2047,0.04189279973506928
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,2047,0.04105759859085083
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,2047,0.04142560064792633
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,2047,0.041289600729942325
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,2047,0.02887200117111206
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,2047,0.020665599405765532
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,2047,0.028839999437332155
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,2047,0.0186256006360054
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,2047,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,2047,0.018779200315475465
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,2047,0.0186256006360054
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,4095,0.07840800285339355
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,4095,0.09233279824256897
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,4095,0.06881920099258423
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,4095,0.06704800128936768
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,4095,0.06859359741210938
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,4095,0.06568639874458312
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,4095,0.06642879843711853
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,4095,0.04796000123023987
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,4095,0.049830400943756105
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,4095,0.03903839886188507
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,4095,0.03703039884567261
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,4095,0.03707039952278137
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,4095,0.0368800014257431
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,4095,0.036774399876594546
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,8191,0.13354560136795043
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,8191,0.12379200458526611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,8191,0.1679792046546936
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,8191,0.1217919945716858
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,8191,0.12153600454330445
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,8191,0.11850240230560302
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,8191,0.11998720169067383
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,8191,0.07339519858360291
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,8191,0.08518720269203187
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,8191,0.061292797327041626
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,8191,0.059832000732421876
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,8191,0.05799199938774109
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,8191,0.05869920253753662
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,8191,0.05898399949073792
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,16383,0.23820641040802001
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,16383,0.22365920543670653
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,16383,0.3170399904251099
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,16383,0.21805760860443116
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,16383,0.22135839462280274
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,16383,0.21792960166931152
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,16383,0.12123839855194092
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,16383,0.22139999866485596
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,16383,0.15632319450378418
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,16383,0.10418879985809326
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,16383,0.10342880487442016
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,16383,0.10270240306854247
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,16383,0.10156160593032837
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,16383,0.10271680355072021
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,32767,0.44986400604248045
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,32767,0.4210559844970703
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,32767,0.6204671859741211
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,32767,0.42054238319396975
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,32767,0.41876959800720215
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,32767,0.4209536075592041
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,32767,0.42150239944458007
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,32767,0.22202880382537843
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,32767,0.2928463935852051
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,32767,0.191867196559906
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,32767,0.1946336030960083
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,32767,0.1902608036994934
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,1,0.016686399281024934
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,1,0.020716799795627593
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,32767,0.19121600389480592
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,32767,0.19057120084762574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,1,0.014552000164985656
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,1,0.014558400213718414
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,1,0.014444799721240997
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,1,0.01467680037021637
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,1,0.014523200690746307
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,1,0.016579200327396394
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,1,0.012590399384498597
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,1,0.0185248002409935
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,1,0.012667199969291687
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,1,0.012438400089740754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,1,0.012857599556446076
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,1,0.012478400021791458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,3,0.016612799465656282
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,3,0.014535999298095703
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,3,0.020878399908542632
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,3,0.014470399916172027
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,3,0.014457599818706512
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,3,0.014571200311183929
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,3,0.014528000354766845
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,3,0.01652960032224655
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,3,0.01836480051279068
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,3,0.012505599856376648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,3,0.012558400630950928
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,3,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,3,0.012539200484752655
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,3,0.012529599666595458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,7,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,7,0.020665599405765532
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,7,0.014864000678062438
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,7,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,7,0.014548799395561219
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,7,0.014455999433994293
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,7,0.014556799829006196
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,7,0.01653600037097931
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,7,0.018513600528240203
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,7,0.014679999649524688
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,7,0.012494400143623352
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,7,0.012430399656295776
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,7,0.012531200051307678
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,7,0.012454400211572647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,15,0.016659200191497803
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,15,0.014716799557209014
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,15,0.020670400559902193
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,15,0.014537599682807923
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,15,0.01462559998035431
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,15,0.014451199769973755
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,15,0.014547200500965118
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,15,0.01664479970932007
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,15,0.01860480010509491
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,15,0.012649600207805634
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,15,0.01252480000257492
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,15,0.01242239996790886
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,15,0.012539200484752655
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,15,0.01239520013332367
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,31,0.016630400717258454
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,31,0.02069759964942932
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,31,0.014662399888038635
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,31,0.014697599411010741
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,31,0.014499199390411378
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,31,0.014555199444293976
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,31,0.014633600413799287
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,31,0.016523200273513793
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,31,0.018326400220394133
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,31,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,31,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,31,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,31,0.01252480000257492
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,31,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,63,0.016543999314308167
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,63,0.02080959975719452
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,63,0.014553600549697876
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,63,0.01446399986743927
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,63,0.014611199498176575
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,63,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,63,0.014528000354766845
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,63,0.016505600512027742
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,63,0.0186271995306015
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,63,0.012392000108957291
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,63,0.012548799812793731
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,63,0.012476799637079239
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,63,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,63,0.01247360035777092
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,127,0.016625599563121797
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,127,0.020923200249671935
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,127,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,127,0.014542399346828461
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,127,0.014617599546909332
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,127,0.014448000490665436
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,127,0.014564800262451171
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,127,0.0165583997964859
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,127,0.018423999845981597
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,127,0.012569600343704223
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,127,0.012468799948692322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,127,0.012444800138473511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,127,0.012414400279521943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,127,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,255,0.018694399297237395
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,255,0.02083519995212555
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,255,0.014535999298095703
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,255,0.014505599439144135
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,255,0.014480000734329224
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,255,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,255,0.014678399264812469
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,255,0.018320000171661376
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,255,0.01860000044107437
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,255,0.012545600533485413
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,255,0.01263200044631958
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,255,0.012481600046157837
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,255,0.01244800016283989
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,255,0.012534399330615998
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,511,0.026819199323654175
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,511,0.031040000915527343
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,511,0.01932159960269928
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,511,0.016843199729919434
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,511,0.01854880005121231
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,511,0.018636800348758698
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,511,0.01663679927587509
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,511,0.020615999400615693
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,511,0.022111999988555908
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,511,0.01454399973154068
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,511,0.014467200636863709
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,511,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,511,0.014609600603580474
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,511,0.014427199959754944
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,1023,0.049107199907302855
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,1023,0.05596799850463867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,1023,0.04121919870376587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,1023,0.04225600063800812
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,1023,0.04137920141220093
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,1023,0.04030719995498657
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,1023,0.04081760048866272
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,1023,0.025702399015426636
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,1023,0.03140000104904175
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,1023,0.018624000251293182
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,1023,0.018598400056362152
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,1023,0.018592000007629395
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,1023,0.01860159933567047
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,1023,0.018638400733470915
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,2047,0.07511360049247742
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,2047,0.06705120205879211
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,2047,0.09511039853096008
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,2047,0.06600000262260437
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,2047,0.06643199920654297
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,2047,0.06511359810829162
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,2047,0.06544640064239501
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,2047,0.045798400044441225
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,2047,0.05197759866714478
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,2047,0.03702400028705597
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,2047,0.035462400317192076
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,2047,0.03524479866027832
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,2047,0.036427199840545654
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,2047,0.035068801045417784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,4095,0.13414080142974855
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,4095,0.17352800369262694
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,4095,0.12041280269622803
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,4095,0.1208191990852356
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,4095,0.11615999937057495
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,4095,0.11948000192642212
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,4095,0.11947200298309327
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,4095,0.07081120014190674
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,4095,0.08924800157546997
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,4095,0.059671998023986816
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,4095,0.05834720134735107
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,4095,0.05743039846420288
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,4095,0.057764798402786255
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,4095,0.0576911985874176
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,8191,0.23035840988159179
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,8191,0.2219696044921875
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,8191,0.3255120038986206
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,8191,0.21509120464324952
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,8191,0.22058560848236083
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,8191,0.2184272050857544
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,8191,0.1199455976486206
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,8191,0.21375679969787598
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,8191,0.1598207950592041
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,8191,0.10360959768295289
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,8191,0.10268800258636475
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,8191,0.1015887975692749
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,8191,0.10132639408111573
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,8191,0.10090080499649048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,16383,0.43773279190063474
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,16383,0.41724162101745604
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,16383,0.6290783882141113
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,16383,0.41774559020996094
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,16383,0.41290721893310545
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,16383,0.4119552135467529
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,16383,0.2201904058456421
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,16383,0.41691999435424804
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,16383,0.3010096073150635
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,16383,0.1924399971961975
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,16383,0.19059040546417236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,16383,0.18999680280685424
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,1,0.024993599951267244
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,16383,0.19083039760589598
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,16383,0.1893504023551941
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,1,0.031339201331138614
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,1,0.020713600516319274
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,1,0.018676799535751343
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,1,0.018783999979496
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,1,0.02069920003414154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,1,0.01958719938993454
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,1,0.024137599766254424
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,1,0.018636800348758698
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,1,0.02707360088825226
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,1,0.01674399971961975
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,1,0.01664000004529953
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,1,0.0168272003531456
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,1,0.016487999260425566
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,3,0.024820800125598907
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,3,0.031062400341033934
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,3,0.020795199275016784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,3,0.020550400018692017
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,3,0.019551999866962433
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,3,0.020824000239372253
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,3,0.020422400534152986
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,3,0.023633599281311035
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,3,0.018716800212860107
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,3,0.026892799139022826
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,3,0.016711999475955964
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,3,0.016590400040149687
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,3,0.016771200299263
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,3,0.016601599752902985
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,7,0.024772800505161285
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,7,0.03126400113105774
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,7,0.0208079993724823
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,7,0.020015999674797058
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,7,0.02067520022392273
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,7,0.020553599298000335
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,7,0.01937440037727356
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,7,0.0247311994433403
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,7,0.026841598749160766
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,7,0.018671999871730804
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,7,0.016631999611854555
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,7,0.016711999475955964
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,7,0.016657599806785585
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,7,0.016471999883651733
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,15,0.02489439994096756
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,15,0.031014400720596313
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,15,0.020982399582862854
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,15,0.020720000565052032
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,15,0.02069920003414154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,15,0.02081120014190674
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,15,0.020582400262355804
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,15,0.02479040026664734
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,15,0.018670399487018586
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,15,0.026868799328804018
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,15,0.016620799899101257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,15,0.016672000288963318
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,15,0.01664000004529953
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,15,0.016564799845218657
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,31,0.02479359954595566
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,31,0.020707200467586517
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,31,0.03136959969997406
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,31,0.02069759964942932
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,31,0.02003040015697479
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,31,0.02072799950838089
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,31,0.0205935999751091
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,31,0.02468159943819046
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,31,0.026807999610900878
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,31,0.01881919950246811
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,31,0.01653279960155487
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,31,0.016539199650287627
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,31,0.016616000235080718
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,31,0.01648000031709671
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,63,0.024792000651359558
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,63,0.0208064004778862
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,63,0.031430399417877196
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,63,0.020657600462436677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,63,0.02067199945449829
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,63,0.020494399964809416
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,63,0.02064799964427948
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,63,0.024758400022983552
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,63,0.026833599805831908
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,63,0.01871040016412735
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,63,0.01655679941177368
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,63,0.01658399999141693
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,63,0.01658399999141693
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,63,0.016603200137615202
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,127,0.025806400179862975
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,127,0.03143840134143829
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,127,0.020875200629234314
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,127,0.020073600113391876
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,127,0.02080159932374954
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,127,0.020659199357032774
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,127,0.02014400064945221
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,127,0.02296479940414429
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,127,0.02690880000591278
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,127,0.018614399433135986
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,127,0.016575999557971954
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,127,0.01672320067882538
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,127,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,127,0.016638399660587312
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,255,0.037049600481987
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,255,0.03586879968643188
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,255,0.022300800681114195
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,255,0.020947200059890748
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,255,0.020078399777412416
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,255,0.020550400018692017
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,255,0.020678399503231047
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,255,0.02682720124721527
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,255,0.026787200570106508
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,255,0.018606400489807128
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,255,0.016612799465656282
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,255,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,255,0.01674399971961975
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,255,0.016598400473594666
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,511,0.05344319939613342
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,511,0.05968480110168457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,511,0.04320479929447174
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,511,0.04239520132541656
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,511,0.04141440093517303
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,511,0.04142400026321411
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,511,0.04116480052471161
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,511,0.03824479877948761
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,511,0.03667680025100708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,511,0.022779199481010436
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,511,0.020715199410915375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,511,0.020656000077724456
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,511,0.020556800067424774
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,511,0.02007199972867966
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,1023,0.08224480152130127
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,1023,0.09865279793739319
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,1023,0.06888160109519958
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,1023,0.06649760007858277
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,1023,0.06796000003814698
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,1023,0.06608800292015075
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,1023,0.06596800088882446
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,1023,0.05602239966392517
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,1023,0.0531711995601654
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,1023,0.03923520147800445
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,1023,0.037088000774383546
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,1023,0.03724960088729858
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,1023,0.03683519959449768
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,1023,0.03594239950180054
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,2047,0.13732160329818727
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,2047,0.17742559909820557
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,2047,0.1213871955871582
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,2047,0.11876159906387329
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,2047,0.11588319540023803
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,2047,0.11799039840698242
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,2047,0.11628799438476563
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,2047,0.08195520043373108
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,2047,0.09397760033607483
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,2047,0.06406880021095276
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,2047,0.061470401287078855
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,2047,0.06023039817810059
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,2047,0.05963519811630249
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,2047,0.05952479839324951
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,4095,0.24618399143218994
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,4095,0.33114399909973147
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,4095,0.2276240110397339
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,4095,0.2248431921005249
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,4095,0.22341599464416503
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,4095,0.22278881072998047
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,4095,0.22195839881896973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,4095,0.13181439638137818
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,4095,0.16781280040740967
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,4095,0.10843839645385742
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,4095,0.10629119873046874
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,4095,0.10518239736557007
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,4095,0.10462720394134521
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,4095,0.10474079847335815
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,8191,0.4571216106414795
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,8191,0.428056001663208
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,8191,0.6318448066711426
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,8191,0.43060641288757323
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,8191,0.42258400917053224
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,8191,0.2307487964630127
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,8191,0.42720799446105956
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,8191,0.42080321311950686
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,8191,0.3101855993270874
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,8191,0.19724639654159545
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,8191,0.194377601146698
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,8191,0.1942960023880005
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,1,0.04323039948940277
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,1,0.051425600051879884
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,8191,0.1923743963241577
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,1,0.03215999901294708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,8191,0.19217599630355836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,1,0.029017600417137145
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,1,0.028963199257850646
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,1,0.028974398970603943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,1,0.030895999073982237
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,1,0.03920319974422455
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,1,0.02693440020084381
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,1,0.043227198719978335
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,1,0.024775999784469604
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,1,0.024505600333213806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,1,0.023343999683856965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,1,0.022734400629997254
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,3,0.04343999922275543
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,3,0.05138400197029114
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,3,0.03292160034179688
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,3,0.02910720109939575
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,3,0.02924480140209198
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,3,0.02898080050945282
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,3,0.028923198580741882
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,3,0.03915359973907471
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,3,0.04325760006904602
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,3,0.026903998851776124
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,3,0.02470880001783371
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,3,0.02290560007095337
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,3,0.02284960001707077
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,3,0.022735999524593355
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,7,0.04367200136184692
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,7,0.05142880082130432
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,7,0.03294720053672791
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,7,0.030907198786735535
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,7,0.030928000807762146
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,7,0.030899199843406677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,7,0.030884799361228944
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,7,0.043252798914909366
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,7,0.03912639915943146
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,7,0.026977598667144775
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,7,0.024864000082015992
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,7,0.022808000445365906
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,7,0.024435199797153473
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,7,0.022937600314617158
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,15,0.04337120056152344
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,15,0.05148640275001526
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,15,0.032979199290275575
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,15,0.030910399556159974
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,15,0.03081600069999695
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,15,0.02895520031452179
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,15,0.03081600069999695
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,15,0.03907999992370605
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,15,0.04321280121803284
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,15,0.026984000205993654
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,15,0.024732799828052522
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,15,0.022788800299167633
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,15,0.022870400547981264
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,15,0.022785599529743194
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,31,0.04529440104961395
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,31,0.052297598123550414
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,31,0.03296799957752228
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,31,0.03073599934577942
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,31,0.03087199926376343
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,31,0.030907198786735535
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,31,0.030964800715446474
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,31,0.04107680022716522
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,31,0.043188801407814024
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,31,0.026815998554229736
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,31,0.02476000040769577
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,31,0.02465279996395111
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,31,0.022767999768257143
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,31,0.022891199588775633
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,63,0.03288320004940033
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,63,0.04538719952106476
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,63,0.05488319993019104
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,63,0.030888000130653383
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,63,0.030854400992393494
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,63,0.03091680109500885
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,63,0.030955201387405394
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,63,0.043222400546073916
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,63,0.041193601489067075
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,63,0.027169600129127502
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,63,0.02476319968700409
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,63,0.02484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,63,0.02271520048379898
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,63,0.023363199830055238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,127,0.05589600205421448
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,127,0.04924159944057464
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,127,0.03504799902439117
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,127,0.030899199843406677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,127,0.03094559907913208
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,127,0.030868801474571227
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,127,0.030935999751091004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,127,0.041403201222419736
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,127,0.044607999920845035
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,127,0.0268528014421463
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,127,0.024779200553894043
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,127,0.023183999955654143
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,127,0.022761599719524385
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,127,0.02276960015296936
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,255,0.06550400257110596
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,255,0.0608240008354187
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,255,0.04730400145053863
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,255,0.04275999963283539
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,255,0.042985600233078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,255,0.041171199083328246
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,255,0.04184800088405609
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,255,0.05042880177497864
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,255,0.047363200783729555
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,255,0.028913599252700806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,255,0.02476319968700409
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,255,0.024715200066566467
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,255,0.022777600586414336
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,255,0.02473759949207306
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,511,0.09542239904403686
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,511,0.07510560154914855
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,511,0.09826080203056335
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,511,0.07225760221481323
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,511,0.07111520171165467
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,511,0.06892160177230836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,511,0.07044320106506348
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,511,0.06505600214004517
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,511,0.062136000394821166
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,511,0.04412479996681214
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,511,0.041247999668121337
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,511,0.03928320109844208
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,511,0.03922719955444336
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,511,0.039139199256896975
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,1023,0.14838080406188964
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,1023,0.12730879783630372
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,1023,0.16792160272598267
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,1023,0.12120800018310547
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,1023,0.12240480184555054
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,1023,0.11965759992599487
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,1023,0.11910719871520996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,1023,0.0940783977508545
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,1023,0.09387680292129516
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,1023,0.06772639751434326
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,1023,0.06350719928741455
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,1023,0.06326720118522644
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,1023,0.06187360286712647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,1023,0.06170880198478699
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,2047,0.2593280076980591
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,2047,0.2290656089782715
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,2047,0.30645439624786375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,2047,0.22295041084289552
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,2047,0.22259678840637206
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,2047,0.21808319091796874
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,2047,0.21967680454254152
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,2047,0.14708640575408935
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,2047,0.16057440042495727
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,2047,0.11488959789276124
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,2047,0.1106160044670105
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,2047,0.10813599824905396
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,2047,0.10801759958267212
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,2047,0.10800800323486329
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,4095,0.48935999870300295
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,4095,0.5771408081054688
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,4095,0.4488239765167236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,4095,0.4408256053924561
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,4095,0.4400320053100586
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,4095,0.4396815776824951
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,4095,0.206276798248291
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,4095,0.2494271993637085
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,4095,0.2898000001907349
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,4095,0.4370736122131348
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,4095,0.2006864070892334
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,4095,0.19725919961929322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,4095,0.1978991985321045
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,4095,0.19924639463424682
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,1,0.07602720260620117
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,1,0.05348960161209106
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,1,0.04833599925041199
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,1,0.09620159864425659
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,1,0.04738239943981171
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,1,0.04741120040416717
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,1,0.0477295994758606
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,1,0.07141119837760926
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,1,0.07822719812393189
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,1,0.04525760114192963
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,1,0.037036800384521486
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,1,0.03716480135917664
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,1,0.03912160098552704
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,1,0.035278400778770445
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,3,0.048583999276161194
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,3,0.07670720219612122
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,3,0.09646400213241577
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,3,0.0534608006477356
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,3,0.04735200107097626
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,3,0.0473904013633728
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,3,0.04735200107097626
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,3,0.07056959867477416
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,3,0.04521760046482086
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,3,0.03906719982624054
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,3,0.07801120281219483
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,3,0.037089601159095764
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,3,0.03710559904575348
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,3,0.037003201246261594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,7,0.07627679705619812
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,7,0.09669439792633057
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,7,0.053478401899337766
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,7,0.04939039945602417
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,7,0.047310400009155276
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,7,0.04736480116844177
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,7,0.04756959974765777
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,7,0.07191519737243653
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,7,0.04530400037765503
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,7,0.07814720273017883
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,7,0.03909280002117157
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,7,0.03717440068721771
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,7,0.03699679970741272
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,7,0.03705919981002807
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,15,0.07625439763069153
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,15,0.05360320210456848
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,15,0.09703840017318725
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,15,0.0493151992559433
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,15,0.04738560020923614
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,15,0.047312000393867494
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,15,0.047366398572921756
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,15,0.0718608021736145
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,15,0.0781328022480011
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,15,0.04524959921836853
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,15,0.03903680145740509
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,15,0.037092798948287965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,15,0.036976000666618346
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,15,0.0369024008512497
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,31,0.07608799934387207
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,31,0.0969760000705719
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,31,0.0554144024848938
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,31,0.049435201287269595
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,31,0.04737919867038727
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,31,0.04888960123062134
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,31,0.04731520116329193
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,31,0.07195199728012085
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,31,0.07852960228919983
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,31,0.04525440037250519
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,31,0.03915199935436249
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,31,0.03711679875850678
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,31,0.03694559931755066
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,31,0.03710080087184906
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,63,0.07724959850311279
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,63,0.0965712010860443
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,63,0.05773280262947082
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,63,0.050625598430633544
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,63,0.04814879894256592
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,63,0.04930880069732666
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,63,0.04787200093269348
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,63,0.08019199967384338
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,63,0.07200639843940734
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,63,0.045347198843955994
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,63,0.03910079896450043
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,63,0.03707039952278137
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,63,0.0370959997177124
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,63,0.03701440095901489
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,127,0.08649920225143433
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,127,0.09913920164108277
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,127,0.06574079990386963
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,127,0.057076799869537356
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,127,0.05547999739646912
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,127,0.05537279844284058
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,127,0.0540831983089447
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,127,0.07234240174293519
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,127,0.08054879903793336
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,127,0.04824320077896118
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,127,0.039129599928855896
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,127,0.03714720010757446
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,127,0.03706560134887695
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,127,0.03652639985084534
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,255,0.11317119598388672
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,255,0.10744160413742065
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,255,0.07616000175476074
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,255,0.07091839909553528
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,255,0.06812959909439087
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,255,0.06574400067329407
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,255,0.06772800087928772
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,255,0.08526239991188049
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,255,0.0792688012123108
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,255,0.051497602462768556
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,255,0.045259198546409606
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,255,0.042208001017570496
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,255,0.04236319959163666
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,255,0.04250720143318176
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,511,0.13130559921264648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,511,0.16074880361557006
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,511,0.1804960012435913
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,511,0.12306720018386841
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,511,0.11733920574188232
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,511,0.11590720415115356
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,511,0.11851199865341186
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,511,0.10938400030136108
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,511,0.1092303991317749
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,511,0.0722815990447998
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,511,0.0665120005607605
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,511,0.06298879981040954
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,511,0.06242560148239136
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,511,0.061776000261306765
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,1023,0.2640079975128174
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,1023,0.22140319347381593
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,1023,0.2116015911102295
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,1023,0.31957600116729734
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,1023,0.20905439853668212
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,1023,0.20990240573883057
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,1023,0.2062511920928955
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,1023,0.15979199409484862
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,1023,0.11610239744186401
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,1023,0.17462879419326782
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,1023,0.10872639417648315
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,1023,0.10558719635009765
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,1023,0.10443359613418579
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,1023,0.10317280292510986
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,2047,0.46430239677429197
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,2047,0.5995200157165528
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,2047,0.41083359718322754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,2047,0.4013040065765381
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,2047,0.39842400550842283
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,2047,0.3955120086669922
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,2047,0.39351840019226075
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,2047,0.2598304033279419
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,2047,0.3039360046386719
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,2047,0.19446719884872438
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,2047,0.20294721126556398
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,2047,0.19062399864196777
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,1,0.011030399799346923
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,1,0.011265599727630615
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,1,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,2047,0.18977440595626832
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,2047,0.18656320571899415
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,1,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,1,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,1,0.010555200278759003
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,1,0.010388799756765366
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,1,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,1,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,1,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,1,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,1,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,1,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,1,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,3,0.012432000041007996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,3,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,3,0.012415999919176102
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,3,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,3,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,3,0.010572800040245056
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,3,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,3,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,3,0.010567999631166457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,3,0.010540799796581268
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,3,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,3,0.010366400331258773
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,3,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,3,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,7,0.012390399724245072
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,7,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,7,0.012534399330615998
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,7,0.010595200210809707
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,7,0.010396800190210342
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,7,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,7,0.010553599894046783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,7,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,7,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,7,0.010558400303125381
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,7,0.010396800190210342
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,7,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,7,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,15,0.01242400035262108
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,15,0.011044800281524658
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,7,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,15,0.01106560006737709
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,15,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,15,0.0103472001850605
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,15,0.010360000282526016
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,15,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,15,0.010598400235176086
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,15,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,15,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,15,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,15,0.010299199819564819
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,15,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,31,0.011128000169992446
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,15,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,31,0.012406399846076966
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,31,0.012371200323104858
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,31,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,31,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,31,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,31,0.010375999659299851
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,31,0.01061599999666214
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,31,0.010553599894046783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,31,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,31,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,31,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,31,0.010564800351858139
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,31,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,63,0.012401600182056428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,63,0.012388800084590913
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,63,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,63,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,63,0.010540799796581268
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,63,0.010550399869680404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,63,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,63,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,63,0.010539200156927109
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,63,0.010558400303125381
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,63,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,63,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,63,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,63,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,127,0.011988800019025803
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,127,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,127,0.012375999987125397
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,127,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,127,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,127,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,127,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,127,0.01055999994277954
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,127,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,127,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,127,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,127,0.010387200117111205
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,127,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,255,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,127,0.01101119965314865
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,255,0.012510399520397186
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,255,0.010571199655532836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,255,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,255,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,255,0.010388799756765366
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,255,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,255,0.01064639985561371
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,255,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,255,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,255,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,255,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,255,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,255,0.010353600233793258
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,511,0.012540799379348756
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,511,0.013801600039005279
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,511,0.014534400403499603
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,511,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,511,0.01244800016283989
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,511,0.012598399817943574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,511,0.012539200484752655
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,511,0.012404800206422806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,511,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,511,0.012436799705028534
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,511,0.010630399733781815
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,511,0.012375999987125397
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,511,0.011486399918794632
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,511,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,1023,0.016540800034999848
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,1023,0.016577599942684172
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,1023,0.01658399999141693
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,1023,0.014584000408649444
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,1023,0.014539200067520141
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,1023,0.014547200500965118
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,1023,0.014505599439144135
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,1023,0.014556799829006196
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,1023,0.014590400457382201
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,1023,0.014486399292945863
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,1023,0.014232000708580017
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,1023,0.0125231996178627
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,1023,0.012603199481964112
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,2047,0.016624000668525696
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,1023,0.012555199861526489
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,2047,0.01675360053777695
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,2047,0.016598400473594666
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,2047,0.014548799395561219
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,2047,0.014638400077819825
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,2047,0.014531199634075165
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,2047,0.01454399973154068
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,2047,0.014708800613880158
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,2047,0.014588800072669984
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,2047,0.014561599493026734
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,2047,0.01440960019826889
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,2047,0.012574400007724761
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,2047,0.012689599394798278
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,2047,0.012556800246238708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,4095,0.020656000077724456
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,4095,0.01847680062055588
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,4095,0.016622400283813475
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,4095,0.016572800278663636
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,4095,0.014593599736690522
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,4095,0.014563199877738953
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,4095,0.01451839953660965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,4095,0.018518400192260743
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,4095,0.016577599942684172
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,4095,0.014659200608730317
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,4095,0.014633600413799287
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,4095,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,4095,0.014475199580192565
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,4095,0.014483200013637542
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,8191,0.024532799422740937
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,8191,0.02528960108757019
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,8191,0.018748800456523895
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,8191,0.016571199893951415
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,8191,0.016569599509239197
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,8191,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,8191,0.01653439998626709
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,8191,0.020627200603485107
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,8191,0.01870719939470291
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,8191,0.016590400040149687
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,8191,0.016627199947834015
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,8191,0.014556799829006196
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,8191,0.014569599926471711
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,8191,0.014542399346828461
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,16383,0.02884320020675659
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,16383,0.03795199990272522
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,16383,0.020683200657367708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,16383,0.02689119875431061
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,16383,0.01992959976196289
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,16383,0.018662400543689728
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,16383,0.019860799610614776
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,16383,0.026807999610900878
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,16383,0.020793600380420683
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,16383,0.020751999318599702
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,16383,0.018724800646305086
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,16383,0.016575999557971954
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,16383,0.018632000684738158
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,16383,0.01860480010509491
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,32767,0.03731200098991394
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,32767,0.032913601398468016
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,32767,0.031017601490020752
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,32767,0.02892639935016632
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,32767,0.02781279981136322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,32767,0.027108800411224366
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,32767,0.028782400488853454
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,32767,0.03099839985370636
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,32767,0.026833599805831908
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,32767,0.024820800125598907
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,32767,0.024799999594688416
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,32767,0.022703999280929567
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,32767,0.022753599286079406
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,32767,0.02274879962205887
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,65535,0.03712640106678009
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,65535,0.039320001006126405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,65535,0.043172800540924074
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,65535,0.03523840010166168
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,65535,0.03498240113258362
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,65535,0.033766400814056394
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,65535,0.033744001388549806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,65535,0.03300800025463104
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,65535,0.030961599946022034
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,65535,0.02887679934501648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,65535,0.02890239953994751
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,65535,0.026929599046707154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,65535,0.028863999247550964
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,65535,0.029078400135040282
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,131071,0.05613600015640259
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,131071,0.06230239868164063
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,131071,0.07241600155830383
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,131071,0.05446079969406128
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,131071,0.05204960107803345
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,131071,0.05550559759140015
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,131071,0.05576639771461487
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,131071,0.04118399918079376
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,131071,0.03917919993400574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,131071,0.03300319910049439
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,131071,0.0424560010433197
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,131071,0.033206400275230405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,131071,0.03298400044441223
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,131071,0.03319360017776489
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,1,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,1,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,1,0.012409599870443344
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,1,0.01252640038728714
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,1,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,1,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,1,0.010732799768447876
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,1,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,1,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,1,0.010788799822330475
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,1,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,1,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,1,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,1,0.010678400099277497
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,3,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,3,0.012503999471664428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,3,0.012580800056457519
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,3,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,3,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,3,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,3,0.010739199817180634
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,3,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,3,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,3,0.010571199655532836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,3,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,3,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,3,0.01061440035700798
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,7,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,3,0.01064160019159317
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,7,0.01197120025753975
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,7,0.011420799791812897
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,7,0.012436799705028534
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,7,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,7,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,7,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,7,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,7,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,7,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,7,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,7,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,7,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,7,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,15,0.012457600235939026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,15,0.012417600303888322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,15,0.01162080019712448
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,15,0.010711999982595444
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,15,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,15,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,15,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,15,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,15,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,15,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,15,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,15,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,15,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,15,0.010702399909496308
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,31,0.012505599856376648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,31,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,31,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,31,0.012587200105190276
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,31,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,31,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,31,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,31,0.010742399841547012
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,31,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,31,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,31,0.01061440035700798
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,31,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,31,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,63,0.010620799660682679
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,63,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,31,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,63,0.010579200088977813
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,63,0.010785599797964096
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,63,0.010998400300741196
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,63,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,63,0.01066880002617836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,63,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,63,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,63,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,63,0.01077279970049858
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,63,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,63,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,63,0.010582400113344192
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,127,0.01231200024485588
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,127,0.012518399953842163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,127,0.012569600343704223
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,127,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,127,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,127,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,127,0.010611200332641601
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,127,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,127,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,127,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,127,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,127,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,127,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,127,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,255,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,255,0.012444800138473511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,255,0.012529599666595458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,255,0.012406399846076966
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,255,0.010550399869680404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,255,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,255,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,255,0.010520000010728836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,255,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,255,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,255,0.01051200032234192
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,255,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,255,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,511,0.014494399726390838
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,255,0.010505600273609162
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,511,0.014448000490665436
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,511,0.012531200051307678
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,511,0.01448799967765808
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,511,0.012544000148773193
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,511,0.012505599856376648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,511,0.012564800679683685
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,511,0.012529599666595458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,511,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,511,0.012489599734544754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,511,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,511,0.012452799826860428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,511,0.010636799782514573
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,511,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,1023,0.016443200409412384
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,1023,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,1023,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,1023,0.014596800506114959
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,1023,0.01255200058221817
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,1023,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,1023,0.013443200290203095
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,1023,0.014547200500965118
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,1023,0.01364160031080246
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,1023,0.01305440068244934
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,1023,0.01244800016283989
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,1023,0.012588800489902496
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,1023,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,1023,0.012433599680662155
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,2047,0.017448000609874725
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,2047,0.016604800522327424
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,2047,0.01465120017528534
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,2047,0.014632000029087067
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,2047,0.01449279934167862
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,2047,0.012591999769210816
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,2047,0.01353919953107834
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,2047,0.014590400457382201
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,2047,0.014596800506114959
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,2047,0.012566399574279786
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,2047,0.012577599287033081
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,2047,0.012518399953842163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,2047,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,2047,0.012489599734544754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,4095,0.020550400018692017
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,4095,0.018632000684738158
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,4095,0.018568000197410582
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,4095,0.014686399698257446
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,4095,0.014449599385261535
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,4095,0.012582400441169738
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,4095,0.014407999813556671
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,4095,0.016630400717258454
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,4095,0.016502399742603303
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,4095,0.015440000593662262
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,4095,0.013622400164604188
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,4095,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,4095,0.012708799540996551
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,4095,0.012544000148773193
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,8191,0.03054560124874115
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,8191,0.02069759964942932
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,8191,0.018889600038528444
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,8191,0.016991999745368958
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,8191,0.0208624005317688
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,8191,0.01658560037612915
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,8191,0.017392000555992125
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,8191,0.02306399941444397
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,8191,0.018590399622917177
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,8191,0.016755199432373045
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,8191,0.014632000029087067
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,8191,0.0164560005068779
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,8191,0.018806399405002595
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,8191,0.016275200247764587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,16383,0.026846399903297423
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,16383,0.033276799321174624
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,16383,0.02272319942712784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,16383,0.02069759964942932
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,16383,0.020734399557113647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,16383,0.021819199621677398
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,16383,0.01966560035943985
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,16383,0.024876800179481507
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,16383,0.018668800592422485
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,16383,0.024648000299930573
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,16383,0.01881760060787201
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,16383,0.01857440024614334
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,16383,0.018982400000095368
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,16383,0.01852159947156906
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,32767,0.03111039996147156
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,32767,0.03717919886112213
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,32767,0.03705280125141144
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,32767,0.026800000667572023
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,32767,0.026774400472640993
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,32767,0.026872000098228453
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,32767,0.02640959918498993
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,32767,0.02743679881095886
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,32767,0.034031999111175534
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,32767,0.026843199133872987
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,32767,0.02476000040769577
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,32767,0.02510400116443634
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,32767,0.02272160053253174
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,32767,0.022884799540042876
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,65535,0.05917279720306397
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,65535,0.0517408013343811
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,65535,0.05631840229034424
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,65535,0.05183519721031189
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,65535,0.05283679962158203
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,65535,0.05502399802207947
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,65535,0.052832001447677614
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,65535,0.035046398639678955
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,65535,0.042052799463272096
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,65535,0.03075999915599823
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,65535,0.034222400188446044
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,65535,0.03083840012550354
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,65535,0.029318401217460634
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,65535,0.030348798632621764
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,131071,0.07865920066833496
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,131071,0.08449919819831848
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,131071,0.07697439789772034
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,131071,0.0940559983253479
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,131071,0.07557119727134705
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,131071,0.07910879850387573
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,131071,0.07884320020675659
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,131071,0.05389279723167419
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,131071,0.05551519989967346
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,131071,0.05311999917030334
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,131071,0.04700640141963959
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,131071,0.046356800198554995
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,1,0.012639999389648438
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,131071,0.047193598747253415
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,131071,0.04803360104560852
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,1,0.012457600235939026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,1,0.012425599992275238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,1,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,1,0.01143840029835701
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,1,0.01053439974784851
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,1,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,1,0.012567999958992004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,1,0.012203200161457062
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,1,0.010577599704265594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,1,0.010566399991512298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,1,0.010572800040245056
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,1,0.010542400181293488
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,1,0.010593599826097488
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,3,0.012569600343704223
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,3,0.01242400035262108
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,3,0.012425599992275238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,3,0.012387199699878693
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,3,0.012417600303888322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,3,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,3,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,3,0.010540799796581268
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,3,0.012596799433231354
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,3,0.010576000064611435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,3,0.01072319969534874
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,3,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,3,0.010555200278759003
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,3,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,7,0.012558400630950928
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,7,0.012433599680662155
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,7,0.012452799826860428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,7,0.012359999865293504
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,7,0.012505599856376648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,7,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,7,0.010571199655532836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,7,0.012606400251388549
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,7,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,7,0.010556799918413162
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,7,0.010571199655532836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,7,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,7,0.01056160032749176
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,7,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,15,0.012561599910259246
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,15,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,15,0.012428800016641617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,15,0.012569600343704223
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,15,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,15,0.010567999631166457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,15,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,15,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,15,0.012323199957609176
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,15,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,15,0.010539200156927109
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,15,0.010556799918413162
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,15,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,15,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,31,0.012646399438381195
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,31,0.012441600114107132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,31,0.012443199753761292
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,31,0.012441600114107132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,31,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,31,0.010532800108194351
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,31,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,31,0.0124208003282547
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,31,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,31,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,31,0.010595200210809707
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,31,0.010552000254392624
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,31,0.010547199845314026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,63,0.01255359947681427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,31,0.010543999820947647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,63,0.01252799928188324
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,63,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,63,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,63,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,63,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,63,0.01056319996714592
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,63,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,63,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,63,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,63,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,63,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,63,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,63,0.01056160032749176
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,127,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,127,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,127,0.01249919980764389
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,127,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,127,0.01247519999742508
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,127,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,127,0.010505600273609162
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,127,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,127,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,127,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,127,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,127,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,127,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,127,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,255,0.012452799826860428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,255,0.012492799758911132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,255,0.01244800016283989
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,255,0.012411200255155564
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,255,0.012377600371837615
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,255,0.01058719977736473
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,255,0.010595200210809707
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,255,0.012374400347471236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,255,0.010585600137710571
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,255,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,255,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,255,0.01056160032749176
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,255,0.010532800108194351
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,255,0.010550399869680404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,511,0.01470080018043518
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,511,0.014478400349617004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,511,0.014496000111103058
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,511,0.014448000490665436
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,511,0.012559999525547028
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,511,0.014459200203418732
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,511,0.012566399574279786
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,511,0.012630400061607362
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,511,0.012596799433231354
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,511,0.012460800260305405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,511,0.012399999797344208
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,511,0.012439999729394913
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,511,0.012427199631929398
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,511,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,1023,0.01672479957342148
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,1023,0.01642719954252243
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,1023,0.014616000652313232
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,1023,0.014691199362277984
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,1023,0.014564800262451171
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,1023,0.014459200203418732
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,1023,0.01268640011548996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,1023,0.014563199877738953
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,1023,0.014388799667358398
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,1023,0.012595200538635254
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,1023,0.012651200592517852
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,1023,0.012449599802494049
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,1023,0.012406399846076966
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,1023,0.012788799405097962
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,2047,0.02072799950838089
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,2047,0.01799200028181076
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,2047,0.014660799503326416
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,2047,0.016847999393939973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,2047,0.014563199877738953
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,2047,0.014553600549697876
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,2047,0.014636799693107605
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,2047,0.01666080057621002
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,2047,0.014611199498176575
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,2047,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,2047,0.01255359947681427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,2047,0.014734399318695069
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,2047,0.012587200105190276
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,2047,0.012425599992275238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,4095,0.02879199981689453
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,4095,0.020695999264717102
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,4095,0.018646399676799773
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,4095,0.018639999628067016
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,4095,0.016652800142765045
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,4095,0.016726399958133697
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,4095,0.01663679927587509
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,4095,0.020691199600696562
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,4095,0.017692799866199493
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,4095,0.016625599563121797
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,4095,0.016577599942684172
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,4095,0.014764800667762756
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,4095,0.01451680064201355
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,4095,0.01478080004453659
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,8191,0.024460799992084503
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,8191,0.030771198868751525
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,8191,0.022203199565410614
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,8191,0.018680000305175783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,8191,0.01857919991016388
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,8191,0.020619200170040132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,8191,0.018617600202560425
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,8191,0.022115199267864226
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,8191,0.02280319929122925
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,8191,0.01674560010433197
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,8191,0.01857919991016388
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,8191,0.016780799627304076
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,8191,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,8191,0.01653279960155487
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,16383,0.02905920147895813
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,16383,0.031057599186897277
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,16383,0.03502399921417236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,16383,0.02479040026664734
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,16383,0.024803200364112855
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,16383,0.022763200104236603
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,16383,0.022673599421977997
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,16383,0.024771200120449068
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,16383,0.029388800263404846
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,16383,0.024784000217914583
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,16383,0.020588800311088562
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,16383,0.01865759938955307
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,16383,0.01879359930753708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,16383,0.018726399540901183
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,32767,0.049353599548339844
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,32767,0.05221440196037293
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,32767,0.05370879769325256
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,32767,0.048767998814582825
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,32767,0.0485071986913681
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,32767,0.04748159945011139
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,32767,0.04905759990215301
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,32767,0.031446400284767154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,32767,0.03344799876213074
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,32767,0.031251201033592226
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,32767,0.027427199482917785
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,32767,0.026752001047134398
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,32767,0.02678079903125763
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,32767,0.0248416006565094
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,65535,0.0757535994052887
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,65535,0.07845759987831116
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,65535,0.09037439823150635
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,65535,0.07212640047073364
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,65535,0.07203519940376282
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,65535,0.07256799936294556
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,65535,0.0722432017326355
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,65535,0.04961279928684235
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,65535,0.04850879907608032
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,65535,0.05081760287284851
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,65535,0.04307200014591217
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,65535,0.043137601017951964
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,65535,0.04124639928340912
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,65535,0.042991998791694644
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,131071,0.1261728048324585
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,131071,0.13490400314331055
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,131071,0.1588912010192871
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,131071,0.12277439832687378
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,131071,0.12125439643859863
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,131071,0.12148319482803345
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,131071,0.12131839990615845
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,131071,0.0771727979183197
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,131071,0.07459999918937683
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,131071,0.08269760012626648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,131071,0.06376320123672485
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,131071,0.06223199963569641
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,131071,0.06383519768714904
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,131071,0.06378399729728698
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,1,0.012732799351215362
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,1,0.012545600533485413
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,1,0.012529599666595458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,1,0.012460800260305405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,1,0.012537600100040435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,1,0.012283200025558471
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,1,0.010552000254392624
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,1,0.01308639943599701
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,1,0.01255359947681427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,1,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,1,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,1,0.011240000277757645
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,1,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,1,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,3,0.01297439932823181
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,3,0.012409599870443344
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,3,0.012598399817943574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,3,0.012352000176906585
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,3,0.012390399724245072
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,3,0.0124208003282547
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,3,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,3,0.012567999958992004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,3,0.012627199292182922
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,3,0.010590399801731109
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,3,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,3,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,3,0.010596799850463866
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,3,0.010582400113344192
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,7,0.012617599964141846
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,7,0.0126351997256279
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,7,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,7,0.0124208003282547
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,7,0.011740799993276596
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,7,0.012387199699878693
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,7,0.01055999994277954
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,7,0.013811199367046357
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,7,0.01260959953069687
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,7,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,7,0.01058719977736473
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,7,0.010599999874830245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,7,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,7,0.01053439974784851
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,15,0.012596799433231354
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,15,0.012417600303888322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,15,0.012558400630950928
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,15,0.01241919994354248
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,15,0.012414400279521943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,15,0.011584000289440155
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,15,0.010550399869680404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,15,0.012624000012874604
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,15,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,15,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,15,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,15,0.010393600165843963
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,15,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,15,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,31,0.012494400143623352
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,31,0.012564800679683685
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,31,0.012489599734544754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,31,0.01242239996790886
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,31,0.012435200065374375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,31,0.012358400225639343
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,31,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,31,0.012563200294971466
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,31,0.012457600235939026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,31,0.01231359988451004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,31,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,31,0.01053439974784851
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,31,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,31,0.010576000064611435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,63,0.012630400061607362
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,63,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,63,0.012508800625801087
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,63,0.0125231996178627
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,63,0.012403199821710587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,63,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,63,0.012403199821710587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,63,0.012443199753761292
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,63,0.010742399841547012
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,63,0.01055999994277954
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,63,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,63,0.010583999752998351
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,63,0.010764800012111664
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,63,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,127,0.012641599774360657
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,127,0.012451200187206269
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,127,0.012718400359153748
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,127,0.012392000108957291
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,127,0.011379200220108032
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,127,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,127,0.010713600367307664
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,127,0.012505599856376648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,127,0.012417600303888322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,127,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,127,0.010913600027561188
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,127,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,127,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,127,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,255,0.012857599556446076
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,255,0.012478400021791458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,255,0.012401600182056428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,255,0.012439999729394913
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,255,0.01053600013256073
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,255,0.011044800281524658
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,255,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,255,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,255,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,255,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,255,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,255,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,255,0.010387200117111205
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,511,0.01658399999141693
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,255,0.010566399991512298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,511,0.014545600116252898
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,511,0.014467200636863709
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,511,0.012503999471664428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,511,0.014603200554847717
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,511,0.014428800344467163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,511,0.012556800246238708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,511,0.012492799758911132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,511,0.012759999930858612
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,511,0.012548799812793731
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,511,0.012476799637079239
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,511,0.01241919994354248
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,511,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,511,0.012366399914026261
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,1023,0.020692799985408784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,1023,0.016684800386428833
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,1023,0.016590400040149687
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,1023,0.01462399959564209
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,1023,0.014547200500965118
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,1023,0.014593599736690522
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,1023,0.014564800262451171
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,1023,0.01661120057106018
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,1023,0.01470080018043518
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,1023,0.014606399834156037
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,1023,0.012542399764060973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,1023,0.012591999769210816
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,1023,0.012591999769210816
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,2047,0.026830399036407472
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,2047,0.02070239931344986
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,1023,0.012441600114107132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,2047,0.018518400192260743
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,2047,0.016675199568271636
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,2047,0.0166143998503685
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,2047,0.01664479970932007
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,2047,0.01658080071210861
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,2047,0.016607999801635742
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,2047,0.02073120027780533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,2047,0.014575999975204468
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,2047,0.01443679928779602
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,2047,0.014027200639247894
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,2047,0.012516799569129943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,2047,0.014267200231552124
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,4095,0.022646400332450866
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,4095,0.02890079915523529
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,4095,0.02083200067281723
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,4095,0.018651199340820313
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,4095,0.018590399622917177
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,4095,0.01851679980754852
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,4095,0.016616000235080718
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,4095,0.020678399503231047
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,4095,0.02072640061378479
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,4095,0.01669279932975769
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,4095,0.016663999855518342
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,4095,0.016545599699020384
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,4095,0.014742399752140044
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,4095,0.014619199931621552
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,8191,0.028886398673057555
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,8191,0.02919679880142212
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,8191,0.03238719999790192
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,8191,0.022193600237369538
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,8191,0.022542400658130644
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,8191,0.02067520022392273
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,8191,0.020660799741744996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,8191,0.024803200364112855
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,8191,0.0248879998922348
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,8191,0.0226623997092247
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,8191,0.019596800208091736
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,8191,0.01669279932975769
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,8191,0.016628800332546233
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,8191,0.018542400002479552
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,16383,0.04892959892749786
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,16383,0.04952319860458374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,16383,0.05631200075149536
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,16383,0.04535520076751709
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,16383,0.0458624005317688
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,16383,0.04567359983921051
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,16383,0.04531520009040833
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,16383,0.03160640001296997
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,16383,0.028998398780822755
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,16383,0.03177280128002167
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,16383,0.02465119957923889
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,16383,0.02481919974088669
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,16383,0.022801600396633148
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,16383,0.024911999702453613
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,32767,0.07527679800987244
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,32767,0.07522079944610596
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,32767,0.06901119947433472
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,32767,0.09288480281829833
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,32767,0.06916319727897643
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,32767,0.06875680088996887
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,32767,0.06911680102348328
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,32767,0.04883359968662262
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,32767,0.046953600645065305
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,32767,0.05226399898529053
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,32767,0.04003520011901855
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,32767,0.039155200123786926
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,32767,0.0393999993801117
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,32767,0.0394463986158371
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,65535,0.12614719867706298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,65535,0.12924959659576415
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,65535,0.16405119895935058
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,65535,0.11829279661178589
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,65535,0.11865760087966919
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,65535,0.11884000301361083
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,65535,0.1179695963859558
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,65535,0.0780672013759613
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,65535,0.07190399765968322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,65535,0.061243200302124025
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,65535,0.08298879861831665
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,65535,0.060977602005004884
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,65535,0.060171198844909665
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,65535,0.05973759889602661
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,131071,0.23205919265747071
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,131071,0.23727519512176515
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,131071,0.2995136022567749
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,131071,0.21776158809661866
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,131071,0.21958560943603517
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,131071,0.21932001113891603
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,131071,0.2169408082962036
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,131071,0.13444639444351197
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,131071,0.11725120544433594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,131071,0.10393439531326294
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,131071,0.10469759702682495
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,131071,0.14971840381622314
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,1,0.014788800477981567
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,1,0.012617599964141846
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,131071,0.10276960134506226
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,131071,0.10271999835968018
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,1,0.012574400007724761
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,1,0.012390399724245072
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,1,0.010857599973678588
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,1,0.01239679977297783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,1,0.01242400035262108
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,1,0.01465120017528534
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,1,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,1,0.012403199821710587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,1,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,1,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,1,0.01056319996714592
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,1,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,3,0.014595200121402741
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,3,0.012494400143623352
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,3,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,3,0.012460800260305405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,3,0.012433599680662155
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,3,0.010937599837779999
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,3,0.012372799962759019
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,3,0.012507200241088867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,3,0.014390400052070618
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,3,0.014289599657058717
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,3,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,3,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,3,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,3,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,7,0.01462240070104599
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,7,0.012593600153923034
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,7,0.012468799948692322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,7,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,7,0.011209599673748016
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,7,0.01244800016283989
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,7,0.012476799637079239
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,7,0.013998399674892425
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,7,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,7,0.012449599802494049
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,7,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,7,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,7,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,7,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,15,0.014499199390411378
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,15,0.01268640011548996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,15,0.012539200484752655
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,15,0.012491200119256973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,15,0.012507200241088867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,15,0.012435200065374375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,15,0.011097600311040878
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,15,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,15,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,15,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,15,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,15,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,15,0.01058880016207695
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,15,0.010579200088977813
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,31,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,31,0.01438400000333786
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,31,0.012567999958992004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,31,0.012399999797344208
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,31,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,31,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,31,0.010644800215959548
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,31,0.012619200348854064
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,31,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,31,0.012452799826860428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,31,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,31,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,31,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,31,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,63,0.014539200067520141
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,63,0.012611199915409089
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,63,0.01247519999742508
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,63,0.01276479959487915
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,63,0.012481600046157837
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,63,0.012489599734544754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,63,0.012438400089740754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,63,0.01281599998474121
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,63,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,63,0.012950399518013
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,63,0.012401600182056428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,63,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,63,0.01061440035700798
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,63,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,127,0.014548799395561219
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,127,0.013843199610710144
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,127,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,127,0.012476799637079239
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,127,0.012489599734544754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,127,0.01249919980764389
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,127,0.012478400021791458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,127,0.012691199779510498
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,127,0.012567999958992004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,127,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,127,0.010543999820947647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,127,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,127,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,127,0.010620799660682679
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,255,0.014531199634075165
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,255,0.012583999335765839
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,255,0.012491200119256973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,255,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,255,0.012401600182056428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,255,0.011180800199508668
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,255,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,255,0.01353919953107834
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,255,0.012505599856376648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,255,0.01239359974861145
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,255,0.011999999731779098
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,255,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,255,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,255,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,511,0.018606400489807128
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,511,0.016616000235080718
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,511,0.015643200278282164
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,511,0.014523200690746307
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,511,0.014524799585342408
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,511,0.014422400295734406
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,511,0.014539200067520141
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,511,0.014603200554847717
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,511,0.014398400485515595
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,511,0.012566399574279786
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,511,0.012430399656295776
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,511,0.01247519999742508
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,511,0.012569600343704223
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,511,0.012600000202655792
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,1023,0.02481440007686615
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,1023,0.0206496000289917
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,1023,0.016689600050449373
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,1023,0.01656640022993088
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,1023,0.014567999541759491
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,1023,0.01648640036582947
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,1023,0.014537599682807923
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,1023,0.018675200641155243
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,1023,0.016627199947834015
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,1023,0.014601600170135499
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,1023,0.014575999975204468
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,1023,0.01263359934091568
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,1023,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,1023,0.012598399817943574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,2047,0.020766399800777435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,2047,0.02696320116519928
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,2047,0.01855199933052063
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,2047,0.020751999318599702
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,2047,0.018559999763965607
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,2047,0.017241600155830383
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,2047,0.018588800728321076
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,2047,0.018665599822998046
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,2047,0.020718400180339814
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,2047,0.01659359931945801
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,2047,0.014672000706195832
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,2047,0.014550399780273438
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,2047,0.014446400105953217
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,2047,0.014556799829006196
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,4095,0.026796799898147584
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,4095,0.02881920039653778
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,4095,0.029899200797080992
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,4095,0.02083040028810501
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,4095,0.020734399557113647
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,4095,0.020681600272655486
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,4095,0.02051679939031601
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,4095,0.022758400440216063
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,4095,0.02284640073776245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,4095,0.02070080041885376
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,4095,0.016828800737857818
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,4095,0.016652800142765045
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,4095,0.016655999422073364
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,4095,0.01659359931945801
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,8191,0.04763039946556091
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,8191,0.050886398553848265
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,8191,0.05414239764213562
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,8191,0.04459519982337952
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,8191,0.04446719884872437
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,8191,0.04499680101871491
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,8191,0.04374080002307892
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,8191,0.03173440098762512
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,8191,0.028940799832344054
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,8191,0.029267200827598573
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,8191,0.022668799757957457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,8191,0.022753599286079406
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,8191,0.022617599368095397
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,8191,0.020848000049591066
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,16383,0.07709280252456666
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,16383,0.07575520277023315
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,16383,0.09315040111541747
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,16383,0.07023680210113525
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,16383,0.07257279753684998
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,16383,0.07010080218315125
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,16383,0.06861919760704041
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,16383,0.05023999810218811
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,16383,0.04779039919376373
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,16383,0.0500927984714508
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,16383,0.039108800888061526
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,16383,0.04069119989871979
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,16383,0.03916000127792359
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,16383,0.038736000657081604
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,32767,0.13320480585098265
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,32767,0.12871840000152587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,32767,0.17068320512771606
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,32767,0.12058559656143189
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,32767,0.12121599912643433
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,32767,0.11918879747390747
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,32767,0.11922080516815185
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,32767,0.08118399977684021
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,32767,0.06982560157775879
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,32767,0.08651360273361205
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,32767,0.06114879846572876
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,32767,0.06029760241508484
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,32767,0.06170880198478699
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,32767,0.05986080169677734
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,65535,0.24444799423217772
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,65535,0.22825920581817627
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,65535,0.3211008071899414
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,65535,0.2181152105331421
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,65535,0.21782560348510743
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,65535,0.21867039203643798
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,65535,0.14247360229492187
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,65535,0.2181999921798706
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,65535,0.11621919870376587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,65535,0.15564800500869752
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,65535,0.10419679880142212
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,65535,0.1024623990058899
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,65535,0.10289119482040406
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,65535,0.10190240144729615
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,131071,0.4700064182281494
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,131071,0.4261023998260498
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,131071,0.630017614364624
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,131071,0.41896638870239256
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,131071,0.40723838806152346
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,131071,0.4059135913848877
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,131071,0.25151040554046633
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,131071,0.20959680080413817
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,131071,0.4183216094970703
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,131071,0.2959088087081909
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,131071,0.18540639877319337
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,131071,0.18776960372924806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,1,0.014478400349617004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,1,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,1,0.012598399817943574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,131071,0.18447840213775635
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,131071,0.18404639959335328
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,1,0.012649600207805634
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,1,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,1,0.012436799705028534
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,1,0.01250240057706833
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,1,0.014416000247001648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,1,0.01451680064201355
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,1,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,1,0.012593600153923034
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,1,0.012566399574279786
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,1,0.012513600289821625
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,1,0.01244639977812767
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,3,0.01451839953660965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,3,0.014670400321483612
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,3,0.014475199580192565
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,3,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,3,0.012647999823093415
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,3,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,3,0.01252640038728714
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,3,0.012567999958992004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,3,0.014590400457382201
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,3,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,3,0.012439999729394913
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,3,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,3,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,3,0.010814400017261505
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,7,0.014529600739479065
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,7,0.014528000354766845
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,7,0.012620800733566284
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,7,0.012571200728416443
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,7,0.012478400021791458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,7,0.012415999919176102
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,7,0.012481600046157837
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,7,0.014593599736690522
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,7,0.012390399724245072
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,7,0.013897599279880523
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,7,0.012392000108957291
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,7,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,7,0.012435200065374375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,7,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,15,0.014619199931621552
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,15,0.014711999893188476
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,15,0.012583999335765839
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,15,0.012692800164222718
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,15,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,15,0.012505599856376648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,15,0.012507200241088867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,15,0.014467200636863709
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,15,0.012572799623012543
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,15,0.012428800016641617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,15,0.012411200255155564
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,15,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,15,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,15,0.012318400293588638
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,31,0.014641599357128143
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,31,0.014585599303245544
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,31,0.014459200203418732
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,31,0.012572799623012543
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,31,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,31,0.012591999769210816
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,31,0.012614400684833526
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,31,0.014507199823856353
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,31,0.01451359987258911
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,31,0.01266240030527115
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,31,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,31,0.011435200273990632
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,31,0.011953599750995636
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,31,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,63,0.014476799964904785
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,63,0.014655999839305878
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,63,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,63,0.012638400495052337
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,63,0.012567999958992004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,63,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,63,0.012537600100040435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,63,0.014484800398349762
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,63,0.012492799758911132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,63,0.012460800260305405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,63,0.01236959993839264
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,63,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,63,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,63,0.013257600367069244
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,127,0.014609600603580474
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,127,0.014636799693107605
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,127,0.014417600631713868
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,127,0.012617599964141846
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,127,0.01268800050020218
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,127,0.012656000256538392
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,127,0.012595200538635254
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,127,0.013753600418567657
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,127,0.014732800424098969
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,127,0.01242239996790886
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,127,0.012444800138473511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,127,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,127,0.01069760024547577
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,127,0.012398400157690049
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,255,0.01483200043439865
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,255,0.01432960033416748
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,255,0.012591999769210816
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,255,0.014699199795722961
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,255,0.012627199292182922
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,255,0.012615999579429627
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,255,0.01271200031042099
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,255,0.014646400511264802
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,255,0.01361439973115921
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,255,0.012438400089740754
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,255,0.012383999675512314
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,255,0.01263359934091568
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,255,0.011903999745845795
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,255,0.01104160025715828
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,511,0.02082560062408447
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,511,0.01854719966650009
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,511,0.016527999937534333
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,511,0.014660799503326416
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,511,0.014584000408649444
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,511,0.01478240042924881
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,511,0.014611199498176575
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,511,0.019750399887561797
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,511,0.014692799746990204
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,511,0.014270399510860444
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,511,0.012614400684833526
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,511,0.012630400061607362
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,511,0.012628799676895142
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,511,0.012596799433231354
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,1023,0.02282239943742752
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,1023,0.020550400018692017
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,1023,0.0249439999461174
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,1023,0.017054399847984313
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,1023,0.016672000288963318
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,1023,0.016758400201797485
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,1023,0.016788800060749055
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,1023,0.020817600190639496
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,1023,0.018806399405002595
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,1023,0.01459999978542328
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,1023,0.016764800250530242
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,1023,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,1023,0.014497600495815277
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,1023,0.014608000218868256
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,2047,0.030963200330734252
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,2047,0.02672480046749115
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,2047,0.02067520022392273
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,2047,0.02707040011882782
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,2047,0.0205935999751091
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,2047,0.019566400349140166
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,2047,0.02008959949016571
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,2047,0.024908800423145295
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,2047,0.0208624005317688
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,2047,0.01674560010433197
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,2047,0.020785599946975708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,2047,0.014643199741840363
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,2047,0.0164000004529953
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,2047,0.014696000516414643
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,4095,0.05129600167274475
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,4095,0.04970400035381317
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,4095,0.05538719892501831
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,4095,0.04267840087413788
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,4095,0.04236960113048553
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,4095,0.04149599969387054
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,4095,0.033228799700737
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,4095,0.04175840020179748
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,4095,0.026694399118423463
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,4095,0.02895359992980957
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,4095,0.020715199410915375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,4095,0.01881439983844757
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,4095,0.02062560021877289
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,4095,0.01884160041809082
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,8191,0.0799456000328064
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,8191,0.07554720044136047
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,8191,0.09431679844856262
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,8191,0.06981920003890991
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,8191,0.06668480038642884
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,8191,0.06584479808807372
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,8191,0.06867200136184692
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,8191,0.051772797107696535
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,8191,0.045470398664474485
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,8191,0.05154719948768616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,8191,0.038176000118255615
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,8191,0.03596799969673157
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,8191,0.03713920116424561
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,8191,0.03658879995346069
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,16383,0.13567839860916137
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,16383,0.13307360410690308
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,16383,0.17133760452270508
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,16383,0.12288960218429565
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,16383,0.11974079608917236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,16383,0.11813440322875976
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,16383,0.11992800235748291
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,16383,0.08295040130615235
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,16383,0.07108960151672364
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,16383,0.08707839846611024
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,16383,0.06141120195388794
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,16383,0.05881119966506958
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,16383,0.059271997213363646
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,16383,0.05802879929542541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,32767,0.24784319400787352
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,32767,0.23376801013946533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,32767,0.32578721046447756
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,32767,0.21978719234466554
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,32767,0.22192161083221434
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,32767,0.2222656011581421
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,32767,0.21890559196472167
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,32767,0.14272639751434327
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,32767,0.12216479778289795
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,32767,0.15907360315322877
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,32767,0.10508960485458374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,32767,0.10448640584945679
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,32767,0.10313600301742554
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,32767,0.103329598903656
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,65535,0.44716801643371584
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,65535,0.4714848041534424
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,65535,0.6291024208068847
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,65535,0.4197199821472168
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,65535,0.4436607837677002
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,65535,0.42352800369262694
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,65535,0.2641151905059814
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,65535,0.41962718963623047
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,65535,0.21782879829406737
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,65535,0.19353599548339845
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,65535,0.29936800003051756
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,65535,0.19101760387420655
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,1,0.016513599455356597
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,1,0.014612799882888794
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,1,0.014585599303245544
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,1,0.012747199833393097
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,1,0.012833599746227265
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,65535,0.19088319540023804
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,65535,0.1911679983139038
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,1,0.012647999823093415
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,1,0.012647999823093415
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,1,0.014504000544548035
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,1,0.016433599591255187
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,1,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,1,0.014500799775123595
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,1,0.012503999471664428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,1,0.012481600046157837
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,1,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,3,0.016505600512027742
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,3,0.014753599464893342
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,3,0.014449599385261535
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,3,0.012444800138473511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,3,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,3,0.01255359947681427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,3,0.012670400738716125
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,3,0.01652960032224655
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,3,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,3,0.013473600149154663
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,3,0.012460800260305405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,3,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,3,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,7,0.01645440012216568
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,3,0.012398400157690049
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,7,0.014524799585342408
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,7,0.014531199634075165
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,7,0.012571200728416443
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,7,0.014497600495815277
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,7,0.012544000148773193
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,7,0.012444800138473511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,7,0.01652639955282211
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,7,0.012441600114107132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,7,0.014433600008487701
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,7,0.012411200255155564
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,7,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,7,0.01051200032234192
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,7,0.012443199753761292
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,15,0.01658719927072525
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,15,0.014558400213718414
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,15,0.014510400593280792
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,15,0.014529600739479065
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,15,0.012593600153923034
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,15,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,15,0.012547199428081513
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,15,0.016599999368190767
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,15,0.01446560025215149
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,15,0.012492799758911132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,15,0.012580800056457519
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,15,0.012505599856376648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,15,0.012366399914026261
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,31,0.016502399742603303
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,31,0.014494399726390838
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,15,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,31,0.01462399959564209
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,31,0.013624000549316406
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,31,0.012558400630950928
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,31,0.013300800323486328
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,31,0.012451200187206269
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,31,0.01655520051717758
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,31,0.012579199671745301
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,31,0.012580800056457519
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,31,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,31,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,31,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,31,0.012387199699878693
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,63,0.015211200714111328
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,63,0.014508800208568573
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,63,0.014612799882888794
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,63,0.014256000518798828
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,63,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,63,0.012556800246238708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,63,0.012513600289821625
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,63,0.01656160056591034
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,63,0.012740799784660339
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,63,0.012532800436019897
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,63,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,63,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,63,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,127,0.016443200409412384
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,127,0.014542399346828461
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,63,0.012460800260305405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,127,0.014758400619029999
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,127,0.012563200294971466
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,127,0.012564800679683685
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,127,0.012692800164222718
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,127,0.012515200674533844
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,127,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,127,0.014441600441932679
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,127,0.012598399817943574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,127,0.012457600235939026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,127,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,127,0.012352000176906585
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,127,0.012353599816560746
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,255,0.015548799932003022
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,255,0.01659200042486191
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,255,0.014655999839305878
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,255,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,255,0.012515200674533844
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,255,0.012670400738716125
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,255,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,255,0.016571199893951415
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,255,0.014508800208568573
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,255,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,255,0.012491200119256973
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,255,0.01244639977812767
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,255,0.011604800075292587
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,255,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,511,0.01937440037727356
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,511,0.022724799811840057
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,511,0.018619200587272643
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,511,0.016513599455356597
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,511,0.01660960018634796
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,511,0.015963199734687804
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,511,0.016564799845218657
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,511,0.018662400543689728
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,511,0.014692799746990204
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,511,0.02070080041885376
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,511,0.014484800398349762
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,511,0.012588800489902496
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,511,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,511,0.012571200728416443
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,1023,0.02680639922618866
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,1023,0.028892800211906433
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,1023,0.028974398970603943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,1023,0.020630399882793426
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,1023,0.01863519996404648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,1023,0.018638400733470915
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,1023,0.01865279972553253
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,1023,0.02117439955472946
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,1023,0.022763200104236603
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,1023,0.018702399730682374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,1023,0.016631999611854555
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,1023,0.014582400023937226
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,1023,0.014655999839305878
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,1023,0.01505119949579239
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,2047,0.04741120040416717
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,2047,0.0515824019908905
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,2047,0.05344799757003784
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,2047,0.04323039948940277
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,2047,0.04310719966888428
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,2047,0.041571199893951416
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,2047,0.041249600052833554
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,2047,0.03089439868927002
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,2047,0.028815999627113342
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,2047,0.026849600672721862
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,2047,0.020619200170040132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,2047,0.018615999817848207
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,2047,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,2047,0.018624000251293182
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,4095,0.07417920231819153
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,4095,0.08016319870948792
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,4095,0.09102720022201538
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,4095,0.06827999949455262
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,4095,0.0686784029006958
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,4095,0.06621760129928589
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,4095,0.06569600105285645
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,4095,0.04982399940490723
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,4095,0.04896639883518219
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,4095,0.04940640032291412
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,4095,0.03905119895935059
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,4095,0.03703039884567261
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,4095,0.036478400230407715
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,4095,0.03722400069236755
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,8191,0.12944159507751465
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,8191,0.13679039478302002
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,8191,0.1671872019767761
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,8191,0.12477760314941407
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,8191,0.11945279836654663
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,8191,0.12190719842910766
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,8191,0.1194607973098755
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,8191,0.07949920296669007
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,8191,0.0725488007068634
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,8191,0.08476799726486206
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,8191,0.06109920144081116
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,8191,0.05956799983978271
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,8191,0.05897759795188904
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,8191,0.05892800092697144
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,16383,0.2422111988067627
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,16383,0.2409359931945801
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,16383,0.31959359645843505
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,16383,0.22656641006469727
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,16383,0.22142879962921141
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,16383,0.221232008934021
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,16383,0.22330238819122314
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,16383,0.13528159856796265
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,16383,0.12253600358963013
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,16383,0.155404794216156
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,16383,0.10562720298767089
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,16383,0.10338079929351807
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,16383,0.10445599555969239
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,16383,0.10283199548721314
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,32767,0.44669761657714846
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,32767,0.4620816230773926
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,32767,0.6207183837890625
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,32767,0.4230991840362549
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,32767,0.42366719245910645
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,32767,0.4194943904876709
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,32767,0.2544719934463501
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,32767,0.4217264175415039
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,32767,0.2201456069946289
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,32767,0.2975600004196167
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,32767,0.19415839910507202
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,32767,0.19148800373077393
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,32767,0.19151519536972045
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,1,0.021031999588012697
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,1,0.016633599996566772
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,1,0.014545600116252898
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,1,0.020721599459648132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,1,0.014601600170135499
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,1,0.01451359987258911
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,1,0.014683200418949128
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,1,0.022728000581264497
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,32767,0.1906767964363098
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,1,0.01658719927072525
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,1,0.018438400328159334
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,1,0.012515200674533844
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,1,0.012620800733566284
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,1,0.012507200241088867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,1,0.012851199507713318
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,3,0.020766399800777435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,3,0.020715199410915375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,3,0.016689600050449373
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,3,0.014614400267601014
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,3,0.014460800588130951
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,3,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,3,0.014478400349617004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,3,0.022672000527381896
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,3,0.016599999368190767
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,3,0.018628799915313722
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,3,0.012574400007724761
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,3,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,3,0.012508800625801087
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,3,0.012518399953842163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,7,0.016726399958133697
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,7,0.0225040003657341
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,7,0.014508800208568573
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,7,0.020703999698162077
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,7,0.01449120044708252
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,7,0.014643199741840363
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,7,0.014505599439144135
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,7,0.022644799947738648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,7,0.018632000684738158
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,7,0.016627199947834015
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,7,0.012547199428081513
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,7,0.012516799569129943
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,7,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,15,0.02080959975719452
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,7,0.012537600100040435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,15,0.016654400527477263
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,15,0.020771199464797975
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,15,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,15,0.014523200690746307
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,15,0.014496000111103058
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,15,0.02271520048379898
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,15,0.014504000544548035
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,15,0.016598400473594666
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,15,0.01860000044107437
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,15,0.012571200728416443
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,15,0.012510399520397186
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,15,0.01244639977812767
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,15,0.01252640038728714
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,31,0.02099519968032837
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,31,0.016732800006866454
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,31,0.020763200521469117
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,31,0.014560000598430633
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,31,0.014590400457382201
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,31,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,31,0.014542399346828461
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,31,0.02265920042991638
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,31,0.0165583997964859
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,31,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,31,0.012716799974441528
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,31,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,31,0.012462399899959564
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,31,0.012406399846076966
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,63,0.02165919989347458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,63,0.0166703999042511
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,63,0.014499199390411378
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,63,0.020579199492931365
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,63,0.014505599439144135
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,63,0.014526399970054626
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,63,0.014460800588130951
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,63,0.022700800001621245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,63,0.016598400473594666
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,63,0.018435199558734894
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,63,0.01252799928188324
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,63,0.01252640038728714
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,63,0.012537600100040435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,63,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,127,0.020694400370121
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,127,0.016672000288963318
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,127,0.014496000111103058
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,127,0.020654399693012238
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,127,0.014684799313545226
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,127,0.01449279934167862
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,127,0.014574399590492249
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,127,0.022806400060653688
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,127,0.016575999557971954
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,127,0.018648000061511995
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,127,0.014232000708580017
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,127,0.01249919980764389
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,127,0.01250240057706833
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,127,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,255,0.026710399985313417
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,255,0.01868959963321686
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,255,0.014561599493026734
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,255,0.02059199959039688
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,255,0.014534400403499603
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,255,0.014561599493026734
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,255,0.01446399986743927
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,255,0.01669279932975769
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,255,0.024707199633121492
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,255,0.01807360053062439
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,255,0.012564800679683685
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,255,0.012366399914026261
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,255,0.012513600289821625
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,255,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,511,0.026921600103378296
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,511,0.03760960102081299
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,511,0.030929601192474364
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,511,0.018638400733470915
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,511,0.018671999871730804
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,511,0.018636800348758698
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,511,0.01860000044107437
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,511,0.02887200117111206
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,511,0.02059520035982132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,511,0.022067199647426605
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,511,0.014630399644374847
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,511,0.014563199877738953
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,511,0.014558400213718414
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,1023,0.052667200565338135
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,511,0.01440960019826889
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,1023,0.04937280118465424
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,1023,0.055105602741241454
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,1023,0.04155359864234924
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,1023,0.041468799114227295
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,1023,0.04116640090942383
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,1023,0.041308799386024476
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,1023,0.0391728013753891
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,1023,0.026870399713516235
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,1023,0.02037599980831146
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,1023,0.018673600256443025
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,1023,0.030100798606872557
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,1023,0.018503999710083006
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,1023,0.018636800348758698
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,2047,0.08381440043449402
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,2047,0.07530400156974792
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,2047,0.0954367995262146
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,2047,0.06609759926795959
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,2047,0.06667519807815551
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,2047,0.06564639806747437
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,2047,0.06584320068359376
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,2047,0.05555199980735779
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,2047,0.045326399803161624
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,2047,0.05222560167312622
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,2047,0.03714880049228668
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,2047,0.035417601466178894
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,2047,0.0358240008354187
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,2047,0.03561759889125824
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,4095,0.13275519609451295
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,4095,0.13974399566650392
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,4095,0.12187360525131226
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,4095,0.17244479656219483
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,4095,0.1203279972076416
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,4095,0.1197775959968567
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,4095,0.11955360174179078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,4095,0.08826720118522643
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,4095,0.07146080136299134
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,4095,0.089547199010849
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,4095,0.059867197275161745
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,4095,0.05828319787979126
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,4095,0.05865600109100342
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,4095,0.058217602968215945
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,8191,0.2518032073974609
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,8191,0.23698720932006836
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,8191,0.32667520046234133
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,8191,0.2216495990753174
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,8191,0.22048959732055665
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,8191,0.2187040090560913
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,8191,0.21883039474487304
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,8191,0.12208800315856934
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,8191,0.150108802318573
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,8191,0.15998560190200806
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,8191,0.10375200510025025
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,8191,0.10237280130386353
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,8191,0.10258239507675171
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,8191,0.10174720287322998
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,16383,0.4766863822937012
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,16383,0.44142880439758303
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,16383,0.6277967929840088
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,16383,0.4163792133331299
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,16383,0.4327888011932373
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,16383,0.41211519241333006
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,16383,0.27123520374298093
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,16383,0.4150415897369385
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,16383,0.21859359741210938
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,16383,0.1922991991043091
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,16383,0.30186560153961184
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,16383,0.19021120071411132
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,1,0.037191998958587644
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,1,0.024803200364112855
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,1,0.03110400140285492
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,1,0.020656000077724456
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,16383,0.18904800415039064
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,16383,0.1892032027244568
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,1,0.02080959975719452
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,1,0.018779200315475465
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,1,0.023875199258327484
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,1,0.019827200472354888
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,1,0.035743999481201175
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,1,0.02667680084705353
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,1,0.018726399540901183
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,1,0.01661760061979294
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,1,0.016564799845218657
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,1,0.016598400473594666
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,3,0.024864000082015992
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,3,0.037273600697517395
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,3,0.020614400506019592
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,3,0.03141599893569946
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,3,0.020615999400615693
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,3,0.020587199926376344
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,3,0.019435200095176696
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,3,0.03668160140514374
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,3,0.02451840043067932
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,3,0.018622399866580965
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,3,0.016758400201797485
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,3,0.02686559855937958
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,3,0.016616000235080718
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,3,0.01659359931945801
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,7,0.03702239990234375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,7,0.024851199984550477
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,7,0.030937600135803222
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,7,0.020683200657367708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,7,0.020628799498081208
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,7,0.020703999698162077
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,7,0.02048799991607666
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,7,0.024556800723075867
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,7,0.035209599137306216
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,7,0.02680320143699646
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,7,0.01863519996404648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,7,0.01658399999141693
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,7,0.016603200137615202
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,7,0.016612799465656282
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,15,0.02489120066165924
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,15,0.03716000020503998
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,15,0.031036800146102904
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,15,0.020777599513530733
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,15,0.020444799959659577
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,15,0.020732800662517547
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,15,0.020475199818611144
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,15,0.024281600117683412
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,15,0.03526880145072937
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,15,0.018612800538539885
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,15,0.0267984002828598
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,15,0.016684800386428833
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,15,0.016622400283813475
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,15,0.016540800034999848
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,31,0.02481440007686615
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,31,0.03881919980049133
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,31,0.03272959887981415
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,31,0.020948800444602966
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,31,0.020785599946975708
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,31,0.02067680060863495
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,31,0.020667199790477753
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,31,0.037041598558425905
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,31,0.022756800055503845
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,31,0.026726400852203368
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,31,0.018665599822998046
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,31,0.01659200042486191
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,31,0.01660960018634796
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,31,0.016568000614643096
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,63,0.024903999269008638
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,63,0.039155200123786926
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,63,0.032313600182533264
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,63,0.020715199410915375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,63,0.020689600706100465
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,63,0.02066880017518997
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,63,0.020732800662517547
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,63,0.03721440136432648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,63,0.02396959960460663
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,63,0.026817598938941957
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,63,0.01863040030002594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,63,0.016748799383640288
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,63,0.016628800332546233
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,63,0.016681599617004394
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,127,0.02683520019054413
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,127,0.03914720118045807
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,127,0.033000001311302186
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,127,0.020777599513530733
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,127,0.02056639939546585
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,127,0.020662400126457214
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,127,0.020609599351882935
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,127,0.037145599722862244
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,127,0.026929599046707154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,127,0.02282399982213974
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,127,0.01873600035905838
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,127,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,127,0.016676799952983858
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,127,0.016607999801635742
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,255,0.03760800063610077
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,255,0.047302401065826415
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,255,0.0371071994304657
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,255,0.02269120067358017
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,255,0.02073120027780533
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,255,0.019108800590038298
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,255,0.020796799659729005
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,255,0.026894399523735048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,255,0.04113439917564392
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,255,0.026897600293159483
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,255,0.018568000197410582
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,255,0.016607999801635742
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,255,0.016572800278663636
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,255,0.016711999475955964
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,511,0.0634768009185791
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,511,0.053825598955154416
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,511,0.059716802835464475
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,511,0.04360319972038269
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,511,0.04210880100727081
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,511,0.04135040044784546
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,511,0.04140479862689972
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,511,0.0513759970664978
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,511,0.03697279989719391
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,511,0.022435200214385987
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,511,0.037143999338150026
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,511,0.020670400559902193
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,511,0.020289599895477295
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,511,0.020759999752044678
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,1023,0.09274399876594544
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,1023,0.08329439759254456
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,1023,0.0987775981426239
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,1023,0.06856319904327393
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,1023,0.06763839721679688
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,1023,0.06664320230484008
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,1023,0.06633440256118775
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,1023,0.06755040287971496
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,1023,0.053711998462677005
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,1023,0.05602880120277405
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,1023,0.0396016001701355
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,1023,0.037329599261283875
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,1023,0.03665600121021271
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,1023,0.03728480041027069
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,2047,0.15202080011367797
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,2047,0.1390768051147461
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,2047,0.177510404586792
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,2047,0.12210559844970703
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,2047,0.11951680183410644
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,2047,0.1189728021621704
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,2047,0.11776160001754761
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,2047,0.09913120269775391
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,2047,0.08085119724273682
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,2047,0.09457600116729736
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,2047,0.06454560160636902
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,2047,0.06155679821968078
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,2047,0.06013759970664978
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,2047,0.06004959940910339
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,4095,0.2661936044692993
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,4095,0.2514031887054443
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,4095,0.329584002494812
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,4095,0.23003358840942384
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,4095,0.22642719745635986
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,4095,0.2276479959487915
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,4095,0.22551360130310058
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,4095,0.13250399827957154
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,4095,0.1669152021408081
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,4095,0.16734880208969116
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,4095,0.10928159952163696
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,4095,0.1071552038192749
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,4095,0.10637600421905517
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,4095,0.10459359884262084
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,8191,0.4902160167694092
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,8191,0.46335840225219727
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,8191,0.630072021484375
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,8191,0.42852320671081545
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,8191,0.43448481559753416
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,8191,0.42293438911437986
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,8191,0.28780479431152345
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,8191,0.42644319534301756
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,8191,0.2290112018585205
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,8191,0.19843839406967162
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,8191,0.30998079776763915
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,8191,0.1939360022544861
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,1,0.06381120085716248
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,8191,0.19298880100250243
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,1,0.04324159920215607
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,8191,0.19322880506515502
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,1,0.0329584002494812
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,1,0.051857602596282956
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,1,0.02889440059661865
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,1,0.03012000024318695
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,1,0.029841598868370057
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,1,0.06527040004730225
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,1,0.03916319906711578
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,1,0.04341920018196106
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,1,0.02678399980068207
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,1,0.022814400494098663
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,1,0.024872000515460967
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,1,0.023228800296783446
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,3,0.043201598525047305
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,3,0.06391519904136658
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,3,0.0515824019908905
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,3,0.031934401392936705
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,3,0.02900480031967163
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,3,0.02918879985809326
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,3,0.03911679983139038
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,3,0.031057599186897277
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,3,0.06472480297088623
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,3,0.0433135986328125
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,3,0.026974400877952574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,3,0.022724799811840057
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,3,0.024699200689792634
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,3,0.022937600314617158
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,7,0.06405760049819946
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,7,0.03293600082397461
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,7,0.04333440065383911
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,7,0.05157439708709717
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,7,0.030881598591804504
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,7,0.028968000411987306
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,7,0.03086400032043457
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,7,0.03912000060081482
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,7,0.06559519767761231
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,7,0.04317759871482849
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,7,0.026819199323654175
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,7,0.024676799774169922
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,7,0.023179200291633607
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,7,0.023684799671173096
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,15,0.0645632028579712
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,15,0.04336160123348236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,15,0.05157279968261719
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,15,0.03293119966983795
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,15,0.03083199858665466
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,15,0.030935999751091004
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,15,0.030902400612831116
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,15,0.0654528021812439
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,15,0.039094400405883786
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,15,0.04328800141811371
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,15,0.026907199621200563
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,15,0.02483839988708496
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,15,0.02282720059156418
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,15,0.022804799675941467
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,31,0.06559200286865234
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,31,0.04521119892597199
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,31,0.05272799730300903
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,31,0.03289119899272919
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,31,0.030904000997543334
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,31,0.03089759945869446
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,31,0.030297601222991945
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,31,0.06575359702110291
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,31,0.026796799898147584
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,31,0.04319039881229401
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,31,0.0395552009344101
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,31,0.02479359954595566
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,31,0.022787199914455415
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,31,0.02359199970960617
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,63,0.045403200387954715
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,63,0.06568319797515869
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,63,0.03296639919281006
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,63,0.053718400001525876
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,63,0.03091999888420105
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,63,0.030959999561309813
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,63,0.030983999371528625
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,63,0.0657584011554718
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,63,0.041193601489067075
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,63,0.043489599227905275
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,63,0.024716800451278685
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,63,0.026844799518585205
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,63,0.023020799458026885
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,63,0.02285120040178299
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,127,0.0677183985710144
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,127,0.04947519898414612
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,127,0.033344000577926636
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,127,0.056296002864837644
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,127,0.03094879984855652
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,127,0.030921599268913268
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,127,0.030881598591804504
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,127,0.06581760048866273
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,127,0.042375999689102176
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,127,0.04446400105953217
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,127,0.026982399821281432
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,127,0.02479359954595566
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,127,0.024753600358963013
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,127,0.024639999866485594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,255,0.0839680016040802
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,255,0.060443198680877684
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,255,0.06536319851875305
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,255,0.04722079932689667
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,255,0.0432096004486084
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,255,0.04241760075092316
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,255,0.04104000031948089
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,255,0.07031360268592834
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,255,0.05143359899520874
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,255,0.04658080041408539
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,255,0.028457599878311157
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,255,0.024750399589538574
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,255,0.024438400566577912
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,255,0.02295839935541153
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,511,0.11016960144042968
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,511,0.09592959880828858
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,511,0.09724000096321106
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,511,0.07514879703521729
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,511,0.07203999757766724
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,511,0.07174400091171265
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,511,0.07012799978256226
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,511,0.08529120087623596
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,511,0.06560959815979003
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,511,0.061737602949142455
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,511,0.04426400065422058
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,511,0.0411296010017395
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,511,0.03974240124225616
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,511,0.039175999164581296
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,1023,0.16297760009765624
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,1023,0.15164480209350586
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,1023,0.16919519901275634
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,1023,0.12453440427780152
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,1023,0.12226879596710205
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,1023,0.12047679424285888
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,1023,0.11861120462417603
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,1023,0.1148800015449524
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,1023,0.0924448013305664
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,1023,0.09428160190582276
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,1023,0.0682752013206482
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,1023,0.06372640132904053
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,1023,0.0629360020160675
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,1023,0.06193280220031738
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,2047,0.27009119987487795
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,2047,0.2584944009780884
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,2047,0.30771679878234864
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,2047,0.22848160266876222
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,2047,0.22201600074768066
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,2047,0.2224992036819458
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,2047,0.21886401176452636
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,2047,0.17100000381469727
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,2047,0.14646400213241578
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,2047,0.11492960453033448
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,2047,0.16079679727554322
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,2047,0.10931680202484131
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,2047,0.10900800228118897
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,2047,0.108024001121521
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,4095,0.47734880447387695
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,4095,0.491648006439209
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,4095,0.44683518409729006
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,4095,0.5769951820373536
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,4095,0.43716797828674314
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,4095,0.43920321464538575
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,4095,0.28348960876464846
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,4095,0.43772001266479493
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,4095,0.2511888027191162
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,4095,0.28878719806671144
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,4095,0.2049567937850952
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,4095,0.2010159969329834
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,1,0.0764303982257843
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,1,0.11598559617996215
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,4095,0.1985535979270935
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,1,0.09657760262489319
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,4095,0.19880640506744385
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,1,0.05327519774436951
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,1,0.049039998650550844
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,1,0.047367998957633974
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,1,0.04733439981937408
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,1,0.07049760222434998
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,1,0.11914240121841431
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,1,0.04520159959793091
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,1,0.07805920243263245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,1,0.03913280069828033
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,1,0.03706879913806915
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,1,0.03706560134887695
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,3,0.11725120544433594
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,3,0.07688800096511841
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,3,0.09712479710578918
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,3,0.053548800945281985
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,3,0.047444799542427064
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,3,0.049327999353408813
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,3,0.04731839895248413
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,3,0.07191200256347656
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,3,0.11915680170059204
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,3,0.07805920243263245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,3,0.045332801342010495
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,3,0.038838401436805725
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,3,0.03703519999980927
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,3,0.037028801441192624
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,7,0.1168239951133728
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,7,0.07607200145721435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,7,0.09655839800834656
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,7,0.05347040295600891
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,7,0.04915519952774048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,7,0.04740639925003052
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,7,0.04734559953212738
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,7,0.07127360105514527
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,7,0.11906720399856567
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,7,0.07814239859580993
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,7,0.039099198579788205
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,7,0.037136000394821164
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,7,0.04523360133171082
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,7,0.03701600134372711
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,15,0.07677119970321655
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,15,0.11799039840698242
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,15,0.053617602586746214
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,15,0.09822239875793456
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,15,0.04934079945087433
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,15,0.04770239889621734
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,15,0.04732640087604523
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,15,0.11899679899215698
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,15,0.07192320227622986
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,15,0.07804160118103028
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,15,0.045238399505615236
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,15,0.039032000303268435
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,15,0.037064000964164734
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,15,0.037124800682067874
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,31,0.11897120475769044
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,31,0.07615839838981628
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,31,0.09784799814224243
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,31,0.05558239817619324
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,31,0.049414399266242984
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,31,0.04732480049133301
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,31,0.04740799963474274
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,31,0.11951999664306641
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,31,0.07187839746475219
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,31,0.07854239940643311
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,31,0.045259198546409606
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,31,0.03907040059566498
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,31,0.037212800979614255
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,31,0.0369951993227005
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,63,0.07817919850349427
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,63,0.11798880100250245
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,63,0.09673439860343933
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,63,0.05783519744873047
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,63,0.050393599271774295
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,63,0.04851999878883362
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,63,0.048295998573303224
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,63,0.07197279930114746
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,63,0.11919519901275635
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,63,0.08033760190010071
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,63,0.045244801044464114
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,63,0.03910239934921265
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,63,0.03704800009727478
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,63,0.03705439865589142
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,127,0.08654239773750305
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,127,0.12019200325012207
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,127,0.09867200255393982
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,127,0.06475520133972168
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,127,0.05683519840240479
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,127,0.05553600192070007
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,127,0.055086398124694826
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,127,0.11915359497070313
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,127,0.07196639776229859
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,127,0.08032479882240295
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,127,0.04932160079479218
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,127,0.04066239893436432
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,127,0.03728159964084625
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,127,0.037187200784683225
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,255,0.14953600168228148
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,255,0.11215519905090332
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,255,0.10701440572738648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,255,0.07678880095481873
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,255,0.07130560278892517
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,255,0.06808000206947326
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,255,0.06692320108413696
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,255,0.1280832052230835
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,255,0.08438879847526551
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,255,0.07862399816513062
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,255,0.05106080174446106
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,255,0.04518879950046539
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,255,0.04185279905796051
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,255,0.041577601432800294
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,511,0.16323839426040648
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,511,0.20183520317077636
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,511,0.1795408010482788
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,511,0.11985599994659424
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,511,0.12085119485855103
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,511,0.13011519908905028
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,511,0.12024799585342408
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,511,0.10882719755172729
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,511,0.16045440435409547
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,511,0.10871360301971436
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,511,0.07245759963989258
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,511,0.06589440107345582
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,511,0.06370400190353394
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,511,0.06204479932785034
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,1023,0.30990560054779054
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,1023,0.2653887987136841
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,1023,0.32073121070861815
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,1023,0.21857919692993164
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,1023,0.21338560581207275
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,1023,0.20772318840026854
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,1023,0.20759520530700684
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,1023,0.21861441135406495
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,1023,0.15859520435333252
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,1023,0.17454880475997925
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,1023,0.11565760374069214
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,1023,0.10807360410690307
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,1023,0.10574400424957275
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,1023,0.10472160577774048
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,2047,0.5201680183410644
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,2047,0.46033601760864257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,2047,0.5963456153869628
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,2047,0.4166848182678223
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,2047,0.39869439601898193
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,2047,0.3954335927963257
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,2047,0.3961119890213013
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,2047,0.33138558864593504
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,2047,0.2592367887496948
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,2047,0.20210399627685546
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,2047,0.1941696047782898
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,2047,0.3040143966674805
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,2047,0.19029279947280883
SGLang,0.5.8.post1,NVIDIA GB200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,2047,0.1870144009590149
